]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.cpp
added smart distance feature and optimized all commands using line by line processing
[mothur.git] / treegroupscommand.cpp
1 /*
2  *  treegroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 TreeGroupCommand::TreeGroupCommand(){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 format = globaldata->getFormat();
29                 validCalculator = new ValidCalculators();
30                 
31                 if (format == "sharedfile") {
32                         int i;
33                         for (i=0; i<globaldata->Estimators.size(); i++) {
34                                 if (validCalculator->isValidCalculator("treegroup", globaldata->Estimators[i]) == true) { 
35                                         if (globaldata->Estimators[i] == "jabund") {    
36                                                 treeCalculators.push_back(new JAbund());
37                                         }else if (globaldata->Estimators[i] == "sorabund") { 
38                                                 treeCalculators.push_back(new SorAbund());
39                                         }else if (globaldata->Estimators[i] == "jclass") { 
40                                                 treeCalculators.push_back(new Jclass());
41                                         }else if (globaldata->Estimators[i] == "sorclass") { 
42                                                 treeCalculators.push_back(new SorClass());
43                                         }else if (globaldata->Estimators[i] == "jest") { 
44                                                 treeCalculators.push_back(new Jest());
45                                         }else if (globaldata->Estimators[i] == "sorest") { 
46                                                 treeCalculators.push_back(new SorEst());
47                                         }else if (globaldata->Estimators[i] == "thetayc") { 
48                                                 treeCalculators.push_back(new ThetaYC());
49                                         }else if (globaldata->Estimators[i] == "thetan") { 
50                                                 treeCalculators.push_back(new ThetaN());
51                                         }else if (globaldata->Estimators[i] == "morisitahorn") { 
52                                                 treeCalculators.push_back(new MorHorn());
53                                         }else if (globaldata->Estimators[i] == "braycurtis") { 
54                                                 treeCalculators.push_back(new BrayCurtis());
55                                         }
56                                 }
57                         }
58                 }
59                 
60                 //reset calc for next command
61                 globaldata->setCalc("");
62
63         }
64         catch(exception& e) {
65                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
66                 exit(1);
67         }
68         catch(...) {
69                 cout << "An unknown error has occurred in the TreeGroupCommand class function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
70                 exit(1);
71         }       
72 }
73 //**********************************************************************************************************************
74
75 TreeGroupCommand::~TreeGroupCommand(){
76         delete input;
77         if (format == "sharedfile") {delete read;}
78         else { delete readMatrix;  delete matrix; delete list; }
79         delete tmap;
80         
81 }
82
83 //**********************************************************************************************************************
84
85 int TreeGroupCommand::execute(){
86         try {
87                 if (format == "sharedfile") {
88                         //if the users entered no valid calculators don't execute command
89                         if (treeCalculators.size() == 0) { cout << "You have given no valid calculators." << endl; return 0; }
90
91                         //you have groups
92                         read = new ReadOTUFile(globaldata->inputFileName);      
93                         read->read(&*globaldata); 
94                         
95                         input = globaldata->ginput;
96                         lookup = input->getSharedRAbundVectors();
97                         lastLookup = lookup;
98                         
99                         if (lookup.size() < 2) { cout << "You have not provided enough valid groups.  I cannot run the command." << endl; return 0; }
100                 
101                         //create tree file
102                         makeSimsShared();
103                 }else{
104                         //read in dist file
105                         filename = globaldata->inputFileName;
106                 
107                         if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }        
108                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
109                                 
110                         if(globaldata->getPrecision() != ""){
111                                 convert(globaldata->getPrecision(), precision); 
112                         }
113                 
114                         if(globaldata->getCutOff() != ""){
115                                 convert(globaldata->getCutOff(), cutoff);       
116                                 cutoff += (5 / (precision * 10.0));
117                         }
118                         readMatrix->setCutoff(cutoff);
119         
120                         if(globaldata->getNameFile() != ""){    
121                                 nameMap = new NameAssignment(globaldata->getNameFile());
122                                 nameMap->readMap(1,2);
123                         }
124                         else{
125                                 nameMap = NULL;
126                         }
127         
128                         readMatrix->read(nameMap);
129                         list = readMatrix->getListVector();
130                         matrix = readMatrix->getMatrix();
131
132                         //make treemap
133                         tmap = new TreeMap();
134                         tmap->makeSim(list);
135                         globaldata->gTreemap = tmap;
136                         
137                         globaldata->Groups = tmap->namesOfGroups;
138                 
139                         //clear globaldatas old tree names if any
140                         globaldata->Treenames.clear();
141                 
142                         //fills globaldatas tree names
143                         globaldata->Treenames = globaldata->Groups;
144
145                         makeSimsDist();
146
147                         //create a new filename
148                         outputFile = getRootName(globaldata->inputFileName) + "tre";    
149                                 
150                         createTree();
151                 }
152                                 
153                 //reset groups parameter
154                 globaldata->Groups.clear();  globaldata->setGroups("");
155
156                 return 0;
157         }
158         catch(exception& e) {
159                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
160                 exit(1);
161         }
162         catch(...) {
163                 cout << "An unknown error has occurred in the TreeGroupCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
164                 exit(1);
165         }               
166 }
167 //**********************************************************************************************************************
168
169 void TreeGroupCommand::createTree(){
170         try {
171                 //create tree
172                 t = new Tree();
173                 
174                 //do merges and create tree structure by setting parents and children
175                 //there are numGroups - 1 merges to do
176                 for (int i = 0; i < (numGroups - 1); i++) {
177                         float largest = -1000.0;
178
179                         int row, column;
180                         //find largest value in sims matrix by searching lower triangle
181                         for (int j = 1; j < simMatrix.size(); j++) {
182                                 for (int k = 0; k < j; k++) {
183                                         if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
184                                 }
185                         }
186
187                         //set non-leaf node info and update leaves to know their parents
188                         //non-leaf
189                         t->tree[numGroups + i].setChildren(index[row], index[column]);
190                         
191                         //parents
192                         t->tree[index[row]].setParent(numGroups + i);
193                         t->tree[index[column]].setParent(numGroups + i);
194                         
195                         //blength = distance / 2;
196                         float blength = ((1.0 - largest) / 2);
197                         
198                         //branchlengths
199                         t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
200                         t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
201                         
202                         //set your length to leaves to your childs length plus branchlength
203                         t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
204                         
205                         
206                         //update index 
207                         index[row] = numGroups+i;
208                         index[column] = numGroups+i;
209                         
210                         //remove highest value that caused the merge.
211                         simMatrix[row][column] = -1000.0;
212                         simMatrix[column][row] = -1000.0;
213                         
214                         //merge values in simsMatrix
215                         for (int n = 0; n < simMatrix.size(); n++)      {
216                                 //row becomes merge of 2 groups
217                                 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
218                                 simMatrix[n][row] = simMatrix[row][n];
219                                 //delete column
220                                 simMatrix[column][n] = -1000.0;
221                                 simMatrix[n][column] = -1000.0;
222                         }
223                 }
224                 
225                 //adjust tree to make sure root to tip length is .5
226                 int root = t->findRoot();
227                 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
228                 
229                 //assemble tree
230                 t->assembleTree();
231                 
232                 //print newick file
233                 t->createNewickFile(outputFile);
234                 
235                 //delete tree
236                 delete t;
237         
238         }
239         catch(exception& e) {
240                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
241                 exit(1);
242         }
243         catch(...) {
244                 cout << "An unknown error has occurred in the TreeGroupCommand class function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
245                 exit(1);
246         }
247 }
248 /***********************************************************/
249 void TreeGroupCommand::printSims(ostream& out) {
250         try {
251                 
252                 //output column headers
253                 //out << '\t';
254                 //for (int i = 0; i < lookup.size(); i++) {     out << lookup[i]->getGroup() << '\t';           }
255                 //out << endl;
256                 
257                 
258                 for (int m = 0; m < simMatrix.size(); m++)      {
259                         //out << lookup[m]->getGroup() << '\t';
260                         for (int n = 0; n < simMatrix.size(); n++)      {
261                                 out << simMatrix[m][n] << '\t'; 
262                         }
263                         out << endl;
264                 }
265
266         }
267         catch(exception& e) {
268                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
269                 exit(1);
270         }
271         catch(...) {
272                 cout << "An unknown error has occurred in the TreeGroupCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
273                 exit(1);
274         }               
275 }
276 /***********************************************************/
277 void TreeGroupCommand::makeSimsDist() {
278         try {
279                 numGroups = list->size();
280                 
281                 //initialize index
282                 index.clear();
283                 for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
284                 
285                 //initialize simMatrix
286                 simMatrix.clear();
287                 simMatrix.resize(numGroups);
288                 for (int m = 0; m < simMatrix.size(); m++)      {
289                         for (int j = 0; j < simMatrix.size(); j++)      {
290                                 simMatrix[m].push_back(0.0);
291                         }
292                 }
293                 
294                 //go through sparse matrix and fill sims
295                 //go through each cell in the sparsematrix
296                 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
297                         //similairity = -(distance-1)
298                         simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);   
299                         simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);                           
300                 }
301
302
303         }
304         catch(exception& e) {
305                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
306                 exit(1);
307         }
308         catch(...) {
309                 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
310                 exit(1);
311         }               
312 }
313
314 /***********************************************************/
315 void TreeGroupCommand::makeSimsShared() {
316         try {
317                 int count = 1;  
318         
319                 //clear globaldatas old tree names if any
320                 globaldata->Treenames.clear();
321                 
322                 //fills globaldatas tree names
323                 globaldata->Treenames = globaldata->Groups;
324                 
325                 //create treemap class from groupmap for tree class to use
326                 tmap = new TreeMap();
327                 tmap->makeSim(globaldata->gGroupmap);
328                 globaldata->gTreemap = tmap;
329                 
330                 set<string> processedLabels;
331                 set<string> userLabels = globaldata->labels;
332
333                 //as long as you are not at the end of the file or done wih the lines you want
334                 while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
335                 
336                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){                   
337                                 cout << lookup[0]->getLabel() << '\t' << count << endl;
338                                 process(lookup);
339                                 
340                                 processedLabels.insert(lookup[0]->getLabel());
341                                 userLabels.erase(lookup[0]->getLabel());
342                         }
343                         
344                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
345                                 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
346                                 process(lastLookup);
347                                         
348                                 processedLabels.insert(lastLookup[0]->getLabel());
349                                 userLabels.erase(lastLookup[0]->getLabel());
350                         }
351
352                         //prevent memory leak
353                         if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  } }
354                         lastLookup = lookup;                    
355                         
356                         //get next line to process
357                         lookup = input->getSharedRAbundVectors();
358                         count++;
359                 }
360                 
361                 //output error messages about any remaining user labels
362                 set<string>::iterator it;
363                 bool needToRun = false;
364                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
365                         cout << "Your file does not include the label "<< *it; 
366                         if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
367                                 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
368                                 needToRun = true;
369                         }else {
370                                 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
371                         }
372                 }
373                 
374                 //run last line if you need to
375                 if (needToRun == true)  {
376                         cout << lastLookup[0]->getLabel() << '\t' << count << endl;
377                         process(lastLookup);
378                 }
379                 
380                 for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  }
381                 for(int i = 0 ; i < treeCalculators.size(); i++) {  delete treeCalculators[i]; }
382         }
383         catch(exception& e) {
384                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
385                 exit(1);
386         }
387         catch(...) {
388                 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
389                 exit(1);
390         }               
391 }
392
393 /***********************************************************/
394 void TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
395         try{
396                                 EstOutput data;
397                                 vector<SharedRAbundVector*> subset;
398                                 numGroups = globaldata->Groups.size();
399                                 
400                                 //for each calculator                                                                                           
401                                 for(int i = 0 ; i < treeCalculators.size(); i++) {
402                                         //initialize simMatrix
403                                         simMatrix.clear();
404                                         simMatrix.resize(numGroups);
405                                         for (int m = 0; m < simMatrix.size(); m++)      {
406                                                 for (int j = 0; j < simMatrix.size(); j++)      {
407                                                         simMatrix[m].push_back(0.0);
408                                                 }
409                                         }
410                 
411                                         //initialize index
412                                         index.clear();
413                                         for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
414                 
415                                         //create a new filename
416                                         outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";                         
417                                                                                                 
418                                         for (int k = 0; k < thisLookup.size(); k++) { 
419                                                 for (int l = k; l < thisLookup.size(); l++) {
420                                                         if (k != l) { //we dont need to similiarity of a groups to itself
421                                                                 //get estimated similarity between 2 groups
422                                                                 
423                                                                 subset.clear(); //clear out old pair of sharedrabunds
424                                                                 //add new pair of sharedrabunds
425                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
426                                                                 
427                                                                 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
428                                                                 //save values in similarity matrix
429                                                                 simMatrix[k][l] = data[0];
430                                                                 simMatrix[l][k] = data[0];
431                                                         }
432                                                 }
433                                         }
434                                         
435                                         //creates tree from similarity matrix and write out file
436                                         createTree();
437                                 }
438
439         }
440         catch(exception& e) {
441                 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
442                 exit(1);
443         }
444         catch(...) {
445                 cout << "An unknown error has occurred in the TreeGroupCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
446                 exit(1);
447         }               
448 }
449 /***********************************************************/
450
451         
452