]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.cpp
a21c47afcb1df45fc858c4877c43cc7fe9a586e2
[mothur.git] / treegroupscommand.cpp
1 /*
2  *  treegroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 TreeGroupCommand::TreeGroupCommand(){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 format = globaldata->getFormat();
29                 validCalculator = new ValidCalculators();
30                 
31                 if (format == "sharedfile") {
32                         int i;
33                         for (i=0; i<globaldata->Estimators.size(); i++) {
34                                 if (validCalculator->isValidCalculator("treegroup", globaldata->Estimators[i]) == true) { 
35                                         if (globaldata->Estimators[i] == "jabund") {    
36                                                 treeCalculators.push_back(new JAbund());
37                                         }else if (globaldata->Estimators[i] == "sorabund") { 
38                                                 treeCalculators.push_back(new SorAbund());
39                                         }else if (globaldata->Estimators[i] == "jclass") { 
40                                                 treeCalculators.push_back(new Jclass());
41                                         }else if (globaldata->Estimators[i] == "sorclass") { 
42                                                 treeCalculators.push_back(new SorClass());
43                                         }else if (globaldata->Estimators[i] == "jest") { 
44                                                 treeCalculators.push_back(new Jest());
45                                         }else if (globaldata->Estimators[i] == "sorest") { 
46                                                 treeCalculators.push_back(new SorEst());
47                                         }else if (globaldata->Estimators[i] == "thetayc") { 
48                                                 treeCalculators.push_back(new ThetaYC());
49                                         }else if (globaldata->Estimators[i] == "thetan") { 
50                                                 treeCalculators.push_back(new ThetaN());
51                                         }else if (globaldata->Estimators[i] == "morisitahorn") { 
52                                                 treeCalculators.push_back(new MorHorn());
53                                         }else if (globaldata->Estimators[i] == "braycurtis") { 
54                                                 treeCalculators.push_back(new BrayCurtis());
55                                         }
56                                 }
57                         }
58                 }
59                 
60                 //reset calc for next command
61                 globaldata->setCalc("");
62
63         }
64         catch(exception& e) {
65                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
66                 exit(1);
67         }
68         catch(...) {
69                 cout << "An unknown error has occurred in the TreeGroupCommand class function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
70                 exit(1);
71         }       
72 }
73 //**********************************************************************************************************************
74
75 TreeGroupCommand::~TreeGroupCommand(){
76         delete input;
77         if (format == "sharedfile") {delete read;}
78         else { delete readMatrix;  delete matrix; delete list; }
79         delete tmap;
80         
81 }
82
83 //**********************************************************************************************************************
84
85 int TreeGroupCommand::execute(){
86         try {
87                 if (format == "sharedfile") {
88                         //if the users entered no valid calculators don't execute command
89                         if (treeCalculators.size() == 0) { cout << "You have given no valid calculators." << endl; return 0; }
90
91                         //you have groups
92                         read = new ReadOTUFile(globaldata->inputFileName);      
93                         read->read(&*globaldata); 
94                         
95                         input = globaldata->ginput;
96                         lookup = input->getSharedRAbundVectors();
97                         
98                         if (lookup.size() < 2) { cout << "You have not provided enough valid groups.  I cannot run the command." << endl; return 0; }
99                 
100                         //create tree file
101                         makeSimsShared();
102                 }else{
103                         //read in dist file
104                         filename = globaldata->inputFileName;
105                 
106                         if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }        
107                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
108                                 
109                         if(globaldata->getPrecision() != ""){
110                                 convert(globaldata->getPrecision(), precision); 
111                         }
112                 
113                         if(globaldata->getCutOff() != ""){
114                                 convert(globaldata->getCutOff(), cutoff);       
115                                 cutoff += (5 / (precision * 10.0));
116                         }
117                         readMatrix->setCutoff(cutoff);
118         
119                         if(globaldata->getNameFile() != ""){    
120                                 nameMap = new NameAssignment(globaldata->getNameFile());
121                                 nameMap->readMap(1,2);
122                         }
123                         else{
124                                 nameMap = NULL;
125                         }
126         
127                         readMatrix->read(nameMap);
128                         list = readMatrix->getListVector();
129                         matrix = readMatrix->getMatrix();
130
131                         //make treemap
132                         tmap = new TreeMap();
133                         tmap->makeSim(list);
134                         globaldata->gTreemap = tmap;
135                         
136                         globaldata->Groups = tmap->namesOfGroups;
137                 
138                         //clear globaldatas old tree names if any
139                         globaldata->Treenames.clear();
140                 
141                         //fills globaldatas tree names
142                         globaldata->Treenames = globaldata->Groups;
143
144                         makeSimsDist();
145
146                         //create a new filename
147                         outputFile = getRootName(globaldata->inputFileName) + "tre";    
148                                 
149                         createTree();
150                 }
151                                 
152                 //reset groups parameter
153                 globaldata->Groups.clear();  globaldata->setGroups("");
154
155                 return 0;
156         }
157         catch(exception& e) {
158                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
159                 exit(1);
160         }
161         catch(...) {
162                 cout << "An unknown error has occurred in the TreeGroupCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
163                 exit(1);
164         }               
165 }
166 //**********************************************************************************************************************
167
168 void TreeGroupCommand::createTree(){
169         try {
170                 //create tree
171                 t = new Tree();
172                 
173                 //do merges and create tree structure by setting parents and children
174                 //there are numGroups - 1 merges to do
175                 for (int i = 0; i < (numGroups - 1); i++) {
176                         float largest = -1000.0;
177
178                         int row, column;
179                         //find largest value in sims matrix by searching lower triangle
180                         for (int j = 1; j < simMatrix.size(); j++) {
181                                 for (int k = 0; k < j; k++) {
182                                         if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
183                                 }
184                         }
185
186                         //set non-leaf node info and update leaves to know their parents
187                         //non-leaf
188                         t->tree[numGroups + i].setChildren(index[row], index[column]);
189                         
190                         //parents
191                         t->tree[index[row]].setParent(numGroups + i);
192                         t->tree[index[column]].setParent(numGroups + i);
193                         
194                         //blength = distance / 2;
195                         float blength = ((1.0 - largest) / 2);
196                         
197                         //branchlengths
198                         t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
199                         t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
200                         
201                         //set your length to leaves to your childs length plus branchlength
202                         t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
203                         
204                         
205                         //update index 
206                         index[row] = numGroups+i;
207                         index[column] = numGroups+i;
208                         
209                         //remove highest value that caused the merge.
210                         simMatrix[row][column] = -1000.0;
211                         simMatrix[column][row] = -1000.0;
212                         
213                         //merge values in simsMatrix
214                         for (int n = 0; n < simMatrix.size(); n++)      {
215                                 //row becomes merge of 2 groups
216                                 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
217                                 simMatrix[n][row] = simMatrix[row][n];
218                                 //delete column
219                                 simMatrix[column][n] = -1000.0;
220                                 simMatrix[n][column] = -1000.0;
221                         }
222                 }
223                 
224                 //adjust tree to make sure root to tip length is .5
225                 int root = t->findRoot();
226                 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
227                 
228                 //assemble tree
229                 t->assembleTree();
230                 
231                 //print newick file
232                 t->createNewickFile(outputFile);
233                 
234                 //delete tree
235                 delete t;
236         
237         }
238         catch(exception& e) {
239                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
240                 exit(1);
241         }
242         catch(...) {
243                 cout << "An unknown error has occurred in the TreeGroupCommand class function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
244                 exit(1);
245         }
246 }
247 /***********************************************************/
248 void TreeGroupCommand::printSims(ostream& out) {
249         try {
250                 
251                 //output column headers
252                 //out << '\t';
253                 //for (int i = 0; i < lookup.size(); i++) {     out << lookup[i]->getGroup() << '\t';           }
254                 //out << endl;
255                 
256                 
257                 for (int m = 0; m < simMatrix.size(); m++)      {
258                         //out << lookup[m]->getGroup() << '\t';
259                         for (int n = 0; n < simMatrix.size(); n++)      {
260                                 out << simMatrix[m][n] << '\t'; 
261                         }
262                         out << endl;
263                 }
264
265         }
266         catch(exception& e) {
267                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
268                 exit(1);
269         }
270         catch(...) {
271                 cout << "An unknown error has occurred in the TreeGroupCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
272                 exit(1);
273         }               
274 }
275 /***********************************************************/
276 void TreeGroupCommand::makeSimsDist() {
277         try {
278                 numGroups = list->size();
279                 
280                 //initialize index
281                 index.clear();
282                 for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
283                 
284                 //initialize simMatrix
285                 simMatrix.clear();
286                 simMatrix.resize(numGroups);
287                 for (int m = 0; m < simMatrix.size(); m++)      {
288                         for (int j = 0; j < simMatrix.size(); j++)      {
289                                 simMatrix[m].push_back(0.0);
290                         }
291                 }
292                 
293                 //go through sparse matrix and fill sims
294                 //go through each cell in the sparsematrix
295                 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
296                         //similairity = -(distance-1)
297                         simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);   
298                         simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);                           
299                 }
300
301
302         }
303         catch(exception& e) {
304                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
305                 exit(1);
306         }
307         catch(...) {
308                 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
309                 exit(1);
310         }               
311 }
312
313 /***********************************************************/
314 void TreeGroupCommand::makeSimsShared() {
315         try {
316                 int count = 1;  
317                 EstOutput data;
318                 vector<SharedRAbundVector*> subset;
319         
320                 numGroups = globaldata->Groups.size();
321                 
322                 //clear globaldatas old tree names if any
323                 globaldata->Treenames.clear();
324                 
325                 //fills globaldatas tree names
326                 globaldata->Treenames = globaldata->Groups;
327                 
328                 //create treemap class from groupmap for tree class to use
329                 tmap = new TreeMap();
330                 tmap->makeSim(globaldata->gGroupmap);
331                 globaldata->gTreemap = tmap;
332                 
333                 while(lookup[0] != NULL){
334                 
335                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){                   
336                                 
337                                 cout << lookup[0]->getLabel() << '\t' << count << endl;
338                                 
339                                 //for each calculator                                                                                           
340                                 for(int i = 0 ; i < treeCalculators.size(); i++) {
341                                         //initialize simMatrix
342                                         simMatrix.clear();
343                                         simMatrix.resize(numGroups);
344                                         for (int m = 0; m < simMatrix.size(); m++)      {
345                                                 for (int j = 0; j < simMatrix.size(); j++)      {
346                                                         simMatrix[m].push_back(0.0);
347                                                 }
348                                         }
349                 
350                                         //initialize index
351                                         index.clear();
352                                         for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
353                 
354                                         //create a new filename
355                                         outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre";                             
356                                                                                                 
357                                         for (int k = 0; k < lookup.size(); k++) { 
358                                                 for (int l = k; l < lookup.size(); l++) {
359                                                         if (k != l) { //we dont need to similiarity of a groups to itself
360                                                                 //get estimated similarity between 2 groups
361                                                                 
362                                                                 subset.clear(); //clear out old pair of sharedrabunds
363                                                                 //add new pair of sharedrabunds
364                                                                 subset.push_back(lookup[k]); subset.push_back(lookup[l]); 
365                                                                 
366                                                                 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
367                                                                 //save values in similarity matrix
368                                                                 simMatrix[k][l] = data[0];
369                                                                 simMatrix[l][k] = data[0];
370                                                         }
371                                                 }
372                                         }
373                                         
374                                         //creates tree from similarity matrix and write out file
375                                         createTree();
376                                 }
377                         }
378                         
379                         //prevent memory leak
380                         for (int i = 0; i < lookup.size(); i++) {       delete lookup[i];       }
381                         
382                         //get next line to process
383                         lookup = input->getSharedRAbundVectors();
384                         count++;
385                 }
386                 
387                 for(int i = 0 ; i < treeCalculators.size(); i++) {  delete treeCalculators[i]; }
388         }
389         catch(exception& e) {
390                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
391                 exit(1);
392         }
393         catch(...) {
394                 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
395                 exit(1);
396         }               
397 }
398
399 /***********************************************************/
400
401