]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.cpp
d25c46adde9b46ce3044a115d1c816dd552ac146
[mothur.git] / treegroupscommand.cpp
1 /*
2  *  treegroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 TreeGroupCommand::TreeGroupCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 labels.clear();
31                 Groups.clear();
32                 Estimators.clear();
33                 
34                 //allow user to run help
35                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
36                 
37                 else {
38                         //valid paramters for this command
39                         string Array[] =  {"label","calc","groups", "phylip", "column", "name", "precision","cutoff"};
40                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
41                         
42                         OptionParser parser(option);
43                         map<string, string> parameters = parser. getParameters();
44                         
45                         ValidParameters validParameter;
46                 
47                         //check to make sure all parameters are valid for command
48                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
49                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
50                         }
51                         
52                         //required parameters
53                         phylipfile = validParameter.validFile(parameters, "phylip", true);
54                         if (phylipfile == "not open") { abort = true; }
55                         else if (phylipfile == "not found") { phylipfile = ""; }        
56                         else {  format = "phylip";      }
57                         
58                         columnfile = validParameter.validFile(parameters, "column", true);
59                         if (columnfile == "not open") { abort = true; } 
60                         else if (columnfile == "not found") { columnfile = ""; }
61                         else {  format = "column";      }
62                         
63                         namefile = validParameter.validFile(parameters, "name", true);
64                         if (namefile == "not open") { abort = true; }   
65                         else if (namefile == "not found") { namefile = ""; }
66                         else {  globaldata->setNameFile(namefile);      }
67                         
68                         format = globaldata->getFormat();
69                         
70                         //error checking on files                       
71                         if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == "")))  { mothurOut("You must run the read.otu command or provide a distance file before running the tree.shared command."); mothurOutEndLine(); abort = true; }
72                         else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When running the tree.shared command with a distance file you may not use both the column and the phylip parameters."); mothurOutEndLine(); abort = true; }
73                         
74                         if (columnfile != "") {
75                                 if (namefile == "") {  mothurOut("You need to provide a namefile if you are going to use the column format."); mothurOutEndLine(); abort = true; }
76                         }
77
78                         //check for optional parameter and set defaults
79                         // ...at some point should added some additional type checking...
80                         label = validParameter.validFile(parameters, "label", false);                   
81                         if (label == "not found") { label = ""; }
82                         else { 
83                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
84                                 else { allLines = 1;  }
85                         }
86                         
87                         //if the user has not specified any labels use the ones from read.otu
88                         if(label == "") {  
89                                 allLines = globaldata->allLines; 
90                                 labels = globaldata->labels; 
91                         }
92                                 
93                         groups = validParameter.validFile(parameters, "groups", false);                 
94                         if (groups == "not found") { groups = ""; }
95                         else { 
96                                 splitAtDash(groups, Groups);
97                                 globaldata->Groups = Groups;
98                         }
99                                 
100                         calc = validParameter.validFile(parameters, "calc", false);                     
101                         if (calc == "not found") { calc = "jclass-thetayc";  }
102                         else { 
103                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
104                         }
105                         splitAtDash(calc, Estimators);
106
107                         string temp;
108                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
109                         convert(temp, precision); 
110                         
111                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
112                         convert(temp, cutoff); 
113                         cutoff += (5 / (precision * 10.0));
114
115                                 
116                         if (abort == false) {
117                         
118                                 validCalculator = new ValidCalculators();
119                                 
120                                 if (format == "sharedfile") {
121                                         int i;
122                                         for (i=0; i<Estimators.size(); i++) {
123                                                 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) { 
124                                                         if (Estimators[i] == "jabund") {        
125                                                                 treeCalculators.push_back(new JAbund());
126                                                         }else if (Estimators[i] == "sorabund") { 
127                                                                 treeCalculators.push_back(new SorAbund());
128                                                         }else if (Estimators[i] == "jclass") { 
129                                                                 treeCalculators.push_back(new Jclass());
130                                                         }else if (Estimators[i] == "sorclass") { 
131                                                                 treeCalculators.push_back(new SorClass());
132                                                         }else if (Estimators[i] == "jest") { 
133                                                                 treeCalculators.push_back(new Jest());
134                                                         }else if (Estimators[i] == "sorest") { 
135                                                                 treeCalculators.push_back(new SorEst());
136                                                         }else if (Estimators[i] == "thetayc") { 
137                                                                 treeCalculators.push_back(new ThetaYC());
138                                                         }else if (Estimators[i] == "thetan") { 
139                                                                 treeCalculators.push_back(new ThetaN());
140                                                         }else if (Estimators[i] == "morisitahorn") { 
141                                                                 treeCalculators.push_back(new MorHorn());
142                                                         }else if (Estimators[i] == "braycurtis") { 
143                                                                 treeCalculators.push_back(new BrayCurtis());
144                                                         }
145                                                 }
146                                         }
147                                 }
148                         }       
149                 }
150
151         }
152         catch(exception& e) {
153                 errorOut(e, "TreeGroupCommand", "TreeGroupCommand");
154                 exit(1);
155         }
156 }
157
158 //**********************************************************************************************************************
159
160 void TreeGroupCommand::help(){
161         try {
162                 mothurOut("The tree.shared command creates a .tre to represent the similiarity between groups or sequences.\n");
163                 mothurOut("The tree.shared command can only be executed after a successful read.otu command or by providing a distance file.\n");
164                 mothurOut("The tree.shared command parameters are groups, calc, phylip, column, name, cutoff, precision and label.\n");
165                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
166                 mothurOut("The group names are separated by dashes. The label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n");
167                 mothurOut("The phylip or column parameter are required if you do not run the read.otu command first, and only one may be used.  If you use a column file the name filename is required. \n");
168                 mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
169                 mothurOut("The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n");
170                 mothurOut("Example tree.shared(groups=A-B-C, calc=jabund-sorabund).\n");
171                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
172                 mothurOut("The default value for calc is jclass-thetayc.\n");
173                 mothurOut("The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose.\n");
174                 validCalculator->printCalc("treegroup", cout);
175                 mothurOut("Or the tree.shared command can be in the following format: tree.shared(phylip=yourPhylipFile).\n");
176                 mothurOut("Example tree.shared(phylip=abrecovery.dist).\n");
177                 mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
178         }
179         catch(exception& e) {
180                 errorOut(e, "TreeGroupCommand", "help");
181                 exit(1);
182         }
183 }
184
185
186 //**********************************************************************************************************************
187
188 TreeGroupCommand::~TreeGroupCommand(){
189         if (abort == false) {
190                 
191                 if (format == "sharedfile") { delete read;  delete input; globaldata->ginput = NULL;}
192                 else { delete readMatrix;  delete matrix; delete list; }
193                 delete tmap;
194                 delete validCalculator;
195         }
196         
197 }
198
199 //**********************************************************************************************************************
200
201 int TreeGroupCommand::execute(){
202         try {
203         
204                 if (abort == true) { return 0; }
205                 
206                 if (format == "sharedfile") {
207                         //if the users entered no valid calculators don't execute command
208                         if (treeCalculators.size() == 0) { mothurOut("You have given no valid calculators."); mothurOutEndLine(); return 0; }
209
210                         //you have groups
211                         read = new ReadOTUFile(globaldata->inputFileName);      
212                         read->read(&*globaldata); 
213                         
214                         input = globaldata->ginput;
215                         lookup = input->getSharedRAbundVectors();
216                         lastLabel = lookup[0]->getLabel();
217                         
218                         if (lookup.size() < 2) { mothurOut("You have not provided enough valid groups.  I cannot run the command."); mothurOutEndLine(); return 0; }
219                         
220                         //used in tree constructor 
221                         globaldata->runParse = false;
222                         
223                         //create tree file
224                         makeSimsShared();
225                 }else{
226                         //read in dist file
227                         filename = globaldata->inputFileName;
228                 
229                         if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }        
230                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
231                                 
232                         readMatrix->setCutoff(cutoff);
233         
234                         if(namefile != ""){     
235                                 nameMap = new NameAssignment(namefile);
236                                 nameMap->readMap();
237                         }
238                         else{
239                                 nameMap = NULL;
240                         }
241         
242                         readMatrix->read(nameMap);
243                         list = readMatrix->getListVector();
244                         matrix = readMatrix->getMatrix();
245
246                         //make treemap
247                         tmap = new TreeMap();
248                         tmap->makeSim(list);
249                         globaldata->gTreemap = tmap;
250                         
251                         globaldata->Groups = tmap->namesOfGroups;
252                 
253                         //clear globaldatas old tree names if any
254                         globaldata->Treenames.clear();
255                 
256                         //fills globaldatas tree names
257                         globaldata->Treenames = globaldata->Groups;
258                         
259                         //used in tree constructor 
260                         globaldata->runParse = false;
261                         
262                         makeSimsDist();
263
264                         //create a new filename
265                         outputFile = getRootName(globaldata->inputFileName) + "tre";    
266                                 
267                         createTree();
268                         mothurOut("Tree complete. "); mothurOutEndLine();
269                 }
270                                 
271                 //reset groups parameter
272                 globaldata->Groups.clear();  
273
274                 return 0;
275         }
276         catch(exception& e) {
277                 errorOut(e, "TreeGroupCommand", "execute");
278                 exit(1);
279         }
280 }
281 //**********************************************************************************************************************
282
283 void TreeGroupCommand::createTree(){
284         try {
285                 //create tree
286                 t = new Tree();
287                 
288                 //do merges and create tree structure by setting parents and children
289                 //there are numGroups - 1 merges to do
290                 for (int i = 0; i < (numGroups - 1); i++) {
291                         float largest = -1000.0;
292
293                         int row, column;
294                         //find largest value in sims matrix by searching lower triangle
295                         for (int j = 1; j < simMatrix.size(); j++) {
296                                 for (int k = 0; k < j; k++) {
297                                         if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
298                                 }
299                         }
300
301                         //set non-leaf node info and update leaves to know their parents
302                         //non-leaf
303                         t->tree[numGroups + i].setChildren(index[row], index[column]);
304                         
305                         //parents
306                         t->tree[index[row]].setParent(numGroups + i);
307                         t->tree[index[column]].setParent(numGroups + i);
308                         
309                         //blength = distance / 2;
310                         float blength = ((1.0 - largest) / 2);
311                         
312                         //branchlengths
313                         t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
314                         t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
315                         
316                         //set your length to leaves to your childs length plus branchlength
317                         t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
318                         
319                         
320                         //update index 
321                         index[row] = numGroups+i;
322                         index[column] = numGroups+i;
323                         
324                         //remove highest value that caused the merge.
325                         simMatrix[row][column] = -1000.0;
326                         simMatrix[column][row] = -1000.0;
327                         
328                         //merge values in simsMatrix
329                         for (int n = 0; n < simMatrix.size(); n++)      {
330                                 //row becomes merge of 2 groups
331                                 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
332                                 simMatrix[n][row] = simMatrix[row][n];
333                                 //delete column
334                                 simMatrix[column][n] = -1000.0;
335                                 simMatrix[n][column] = -1000.0;
336                         }
337                 }
338                 
339                 //adjust tree to make sure root to tip length is .5
340                 int root = t->findRoot();
341                 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
342                 
343                 //assemble tree
344                 t->assembleTree();
345                 
346                 //print newick file
347                 t->createNewickFile(outputFile);
348                 
349                 //delete tree
350                 delete t;
351         
352         }
353         catch(exception& e) {
354                 errorOut(e, "TreeGroupCommand", "createTree");
355                 exit(1);
356         }
357 }
358 /***********************************************************/
359 void TreeGroupCommand::printSims(ostream& out) {
360         try {
361                 
362                 //output column headers
363                 //out << '\t';
364                 //for (int i = 0; i < lookup.size(); i++) {     out << lookup[i]->getGroup() << '\t';           }
365                 //out << endl;
366                 
367                 
368                 for (int m = 0; m < simMatrix.size(); m++)      {
369                         //out << lookup[m]->getGroup() << '\t';
370                         for (int n = 0; n < simMatrix.size(); n++)      {
371                                 out << simMatrix[m][n] << '\t'; 
372                         }
373                         out << endl;
374                 }
375
376         }
377         catch(exception& e) {
378                 errorOut(e, "TreeGroupCommand", "printSims");
379                 exit(1);
380         }
381 }
382 /***********************************************************/
383 void TreeGroupCommand::makeSimsDist() {
384         try {
385                 numGroups = list->size();
386                 
387                 //initialize index
388                 index.clear();
389                 for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
390                 
391                 //initialize simMatrix
392                 simMatrix.clear();
393                 simMatrix.resize(numGroups);
394                 for (int m = 0; m < simMatrix.size(); m++)      {
395                         for (int j = 0; j < simMatrix.size(); j++)      {
396                                 simMatrix[m].push_back(0.0);
397                         }
398                 }
399                 
400                 //go through sparse matrix and fill sims
401                 //go through each cell in the sparsematrix
402                 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
403                         //similairity = -(distance-1)
404                         simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);   
405                         simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);                           
406                 }
407
408
409         }
410         catch(exception& e) {
411                 errorOut(e, "TreeGroupCommand", "makeSimsDist");
412                 exit(1);
413         }
414 }
415
416 /***********************************************************/
417 void TreeGroupCommand::makeSimsShared() {
418         try {
419         
420                 //clear globaldatas old tree names if any
421                 globaldata->Treenames.clear();
422                 
423                 //fills globaldatas tree names
424                 globaldata->Treenames = globaldata->Groups;
425                 
426                 //create treemap class from groupmap for tree class to use
427                 tmap = new TreeMap();
428                 tmap->makeSim(globaldata->gGroupmap);
429                 globaldata->gTreemap = tmap;
430                 
431                 set<string> processedLabels;
432                 set<string> userLabels = labels;
433                 
434                 //as long as you are not at the end of the file or done wih the lines you want
435                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
436                 
437                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
438                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
439                                 process(lookup);
440                                 
441                                 processedLabels.insert(lookup[0]->getLabel());
442                                 userLabels.erase(lookup[0]->getLabel());
443                         }
444                         
445                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
446                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
447                                 lookup = input->getSharedRAbundVectors(lastLabel);
448
449                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
450                                 process(lookup);
451                                         
452                                 processedLabels.insert(lookup[0]->getLabel());
453                                 userLabels.erase(lookup[0]->getLabel());
454                         }
455
456                         lastLabel = lookup[0]->getLabel();                      
457                         
458                         //get next line to process
459                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
460                         lookup = input->getSharedRAbundVectors();
461                 }
462                 
463                 //output error messages about any remaining user labels
464                 set<string>::iterator it;
465                 bool needToRun = false;
466                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
467                         mothurOut("Your file does not include the label " + *it); 
468                         if (processedLabels.count(lastLabel) != 1) {
469                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
470                                 needToRun = true;
471                         }else {
472                                 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
473                         }
474                 }
475                 
476                 //run last label if you need to
477                 if (needToRun == true)  {
478                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {             delete lookup[i]; }             } 
479                         lookup = input->getSharedRAbundVectors(lastLabel);
480
481                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
482                         process(lookup);
483                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }         
484                 }
485                 
486                 for(int i = 0 ; i < treeCalculators.size(); i++) {  delete treeCalculators[i]; }
487         }
488         catch(exception& e) {
489                 errorOut(e, "TreeGroupCommand", "makeSimsShared");
490                 exit(1);
491         }
492 }
493
494 /***********************************************************/
495 void TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
496         try{
497                                 EstOutput data;
498                                 vector<SharedRAbundVector*> subset;
499                                 numGroups = thisLookup.size();
500                                 
501                                 //for each calculator                                                                                           
502                                 for(int i = 0 ; i < treeCalculators.size(); i++) {
503                                         //initialize simMatrix
504                                         simMatrix.clear();
505                                         simMatrix.resize(numGroups);
506                                         for (int m = 0; m < simMatrix.size(); m++)      {
507                                                 for (int j = 0; j < simMatrix.size(); j++)      {
508                                                         simMatrix[m].push_back(0.0);
509                                                 }
510                                         }
511                 
512                                         //initialize index
513                                         index.clear();
514                                         for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
515                 
516                                         //create a new filename
517                                         outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";                         
518                                                                                                 
519                                         for (int k = 0; k < thisLookup.size(); k++) { 
520                                                 for (int l = k; l < thisLookup.size(); l++) {
521                                                         if (k != l) { //we dont need to similiarity of a groups to itself
522                                                                 //get estimated similarity between 2 groups
523                                                                 
524                                                                 subset.clear(); //clear out old pair of sharedrabunds
525                                                                 //add new pair of sharedrabunds
526                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
527                                                                 
528                                                                 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
529                                                                 //save values in similarity matrix
530                                                                 simMatrix[k][l] = data[0];
531                                                                 simMatrix[l][k] = data[0];
532                                                         }
533                                                 }
534                                         }
535                                 
536                                         //creates tree from similarity matrix and write out file
537                                         createTree();
538                                 }
539
540         }
541         catch(exception& e) {
542                 errorOut(e, "TreeGroupCommand", "process");
543                 exit(1);
544         }
545 }
546 /***********************************************************/
547
548         
549