]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.cpp
added logfile feature
[mothur.git] / treegroupscommand.cpp
1 /*
2  *  treegroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 TreeGroupCommand::TreeGroupCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 lines.clear();
31                 labels.clear();
32                 Groups.clear();
33                 Estimators.clear();
34                 
35                 //allow user to run help
36                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
37                 
38                 else {
39                         //valid paramters for this command
40                         string Array[] =  {"line","label","calc","groups", "phylip", "column", "name", "precision","cutoff"};
41                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
42                         
43                         OptionParser parser(option);
44                         map<string, string> parameters = parser. getParameters();
45                         
46                         ValidParameters validParameter;
47                 
48                         //check to make sure all parameters are valid for command
49                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
50                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
51                         }
52                         
53                         //required parameters
54                         phylipfile = validParameter.validFile(parameters, "phylip", true);
55                         if (phylipfile == "not open") { abort = true; }
56                         else if (phylipfile == "not found") { phylipfile = ""; }        
57                         else {  format = "phylip";      }
58                         
59                         columnfile = validParameter.validFile(parameters, "column", true);
60                         if (columnfile == "not open") { abort = true; } 
61                         else if (columnfile == "not found") { columnfile = ""; }
62                         else {  format = "column";      }
63                         
64                         namefile = validParameter.validFile(parameters, "name", true);
65                         if (namefile == "not open") { abort = true; }   
66                         else if (namefile == "not found") { namefile = ""; }
67                         else {  globaldata->setNameFile(namefile);      }
68                         
69                         format = globaldata->getFormat();
70                         
71                         //error checking on files                       
72                         if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == "")))  { mothurOut("You must run the read.otu command or provide a distance file before running the tree.shared command."); mothurOutEndLine(); abort = true; }
73                         else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When running the tree.shared command with a distance file you may not use both the column and the phylip parameters."); mothurOutEndLine(); abort = true; }
74                         
75                         if (columnfile != "") {
76                                 if (namefile == "") {  mothurOut("You need to provide a namefile if you are going to use the column format."); mothurOutEndLine(); abort = true; }
77                         }
78
79                         //check for optional parameter and set defaults
80                         // ...at some point should added some additional type checking...
81                         line = validParameter.validFile(parameters, "line", false);                             
82                         if (line == "not found") { line = "";  }
83                         else { 
84                                 if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
85                                 else { allLines = 1;  }
86                         }
87                         
88                         label = validParameter.validFile(parameters, "label", false);                   
89                         if (label == "not found") { label = ""; }
90                         else { 
91                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
92                                 else { allLines = 1;  }
93                         }
94                         
95                         //make sure user did not use both the line and label parameters
96                         if ((line != "") && (label != "")) { mothurOut("You cannot use both the line and label parameters at the same time. "); mothurOutEndLine(); abort = true; }
97                         //if the user has not specified any line or labels use the ones from read.otu
98                         else if((line == "") && (label == "")) {  
99                                 allLines = globaldata->allLines; 
100                                 labels = globaldata->labels; 
101                                 lines = globaldata->lines;
102                         }
103                                 
104                         groups = validParameter.validFile(parameters, "groups", false);                 
105                         if (groups == "not found") { groups = ""; }
106                         else { 
107                                 splitAtDash(groups, Groups);
108                                 globaldata->Groups = Groups;
109                         }
110                                 
111                         calc = validParameter.validFile(parameters, "calc", false);                     
112                         if (calc == "not found") { calc = "jclass-thetayc";  }
113                         else { 
114                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
115                         }
116                         splitAtDash(calc, Estimators);
117
118                         string temp;
119                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
120                         convert(temp, precision); 
121                         
122                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
123                         convert(temp, cutoff); 
124                         cutoff += (5 / (precision * 10.0));
125
126                                 
127                         if (abort == false) {
128                         
129                                 validCalculator = new ValidCalculators();
130                                 
131                                 if (format == "sharedfile") {
132                                         int i;
133                                         for (i=0; i<Estimators.size(); i++) {
134                                                 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) { 
135                                                         if (Estimators[i] == "jabund") {        
136                                                                 treeCalculators.push_back(new JAbund());
137                                                         }else if (Estimators[i] == "sorabund") { 
138                                                                 treeCalculators.push_back(new SorAbund());
139                                                         }else if (Estimators[i] == "jclass") { 
140                                                                 treeCalculators.push_back(new Jclass());
141                                                         }else if (Estimators[i] == "sorclass") { 
142                                                                 treeCalculators.push_back(new SorClass());
143                                                         }else if (Estimators[i] == "jest") { 
144                                                                 treeCalculators.push_back(new Jest());
145                                                         }else if (Estimators[i] == "sorest") { 
146                                                                 treeCalculators.push_back(new SorEst());
147                                                         }else if (Estimators[i] == "thetayc") { 
148                                                                 treeCalculators.push_back(new ThetaYC());
149                                                         }else if (Estimators[i] == "thetan") { 
150                                                                 treeCalculators.push_back(new ThetaN());
151                                                         }else if (Estimators[i] == "morisitahorn") { 
152                                                                 treeCalculators.push_back(new MorHorn());
153                                                         }else if (Estimators[i] == "braycurtis") { 
154                                                                 treeCalculators.push_back(new BrayCurtis());
155                                                         }
156                                                 }
157                                         }
158                                 }
159                         }       
160                 }
161
162         }
163         catch(exception& e) {
164                 errorOut(e, "TreeGroupCommand", "TreeGroupCommand");
165                 exit(1);
166         }
167 }
168
169 //**********************************************************************************************************************
170
171 void TreeGroupCommand::help(){
172         try {
173                 mothurOut("The tree.shared command creates a .tre to represent the similiarity between groups or sequences.\n");
174                 mothurOut("The tree.shared command can only be executed after a successful read.otu command or by providing a distance file.\n");
175                 mothurOut("The tree.shared command parameters are groups, calc, phylip, column, name, cutoff, precision, line and label.  You may not use line and label at the same time.\n");
176                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
177                 mothurOut("The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n");
178                 mothurOut("The phylip or column parameter are required if you do not run the read.otu command first, and only one may be used.  If you use a column file the name filename is required. \n");
179                 mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
180                 mothurOut("The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels).\n");
181                 mothurOut("Example tree.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund).\n");
182                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
183                 mothurOut("The default value for calc is jclass-thetayc.\n");
184                 mothurOut("The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose.\n");
185                 validCalculator->printCalc("treegroup", cout);
186                 mothurOut("Or the tree.shared command can be in the following format: tree.shared(phylip=yourPhylipFile).\n");
187                 mothurOut("Example tree.shared(phylip=abrecovery.dist).\n");
188                 mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
189         }
190         catch(exception& e) {
191                 errorOut(e, "TreeGroupCommand", "help");
192                 exit(1);
193         }
194 }
195
196
197 //**********************************************************************************************************************
198
199 TreeGroupCommand::~TreeGroupCommand(){
200         if (abort == false) {
201                 
202                 if (format == "sharedfile") { delete read;  delete input; globaldata->ginput = NULL;}
203                 else { delete readMatrix;  delete matrix; delete list; }
204                 delete tmap;
205                 delete validCalculator;
206         }
207         
208 }
209
210 //**********************************************************************************************************************
211
212 int TreeGroupCommand::execute(){
213         try {
214         
215                 if (abort == true) { return 0; }
216                 
217                 if (format == "sharedfile") {
218                         //if the users entered no valid calculators don't execute command
219                         if (treeCalculators.size() == 0) { mothurOut("You have given no valid calculators."); mothurOutEndLine(); return 0; }
220
221                         //you have groups
222                         read = new ReadOTUFile(globaldata->inputFileName);      
223                         read->read(&*globaldata); 
224                         
225                         input = globaldata->ginput;
226                         lookup = input->getSharedRAbundVectors();
227                         lastLabel = lookup[0]->getLabel();
228                         
229                         if (lookup.size() < 2) { mothurOut("You have not provided enough valid groups.  I cannot run the command."); mothurOutEndLine(); return 0; }
230                 
231                         globaldata->runParse = false;
232                         
233                         //create tree file
234                         makeSimsShared();
235                 }else{
236                         //read in dist file
237                         filename = globaldata->inputFileName;
238                 
239                         if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }        
240                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
241                                 
242                         readMatrix->setCutoff(cutoff);
243         
244                         if(namefile != ""){     
245                                 nameMap = new NameAssignment(namefile);
246                                 nameMap->readMap();
247                         }
248                         else{
249                                 nameMap = NULL;
250                         }
251         
252                         readMatrix->read(nameMap);
253                         list = readMatrix->getListVector();
254                         matrix = readMatrix->getMatrix();
255
256                         //make treemap
257                         tmap = new TreeMap();
258                         tmap->makeSim(list);
259                         globaldata->gTreemap = tmap;
260                         
261                         globaldata->Groups = tmap->namesOfGroups;
262                 
263                         //clear globaldatas old tree names if any
264                         globaldata->Treenames.clear();
265                 
266                         //fills globaldatas tree names
267                         globaldata->Treenames = globaldata->Groups;
268                         
269                         globaldata->runParse = false;
270                         
271                         makeSimsDist();
272
273                         //create a new filename
274                         outputFile = getRootName(globaldata->inputFileName) + "tre";    
275                                 
276                         createTree();
277                         mothurOut("Tree complete. "); mothurOutEndLine();
278                 }
279                                 
280                 //reset groups parameter
281                 globaldata->Groups.clear();  
282
283                 return 0;
284         }
285         catch(exception& e) {
286                 errorOut(e, "TreeGroupCommand", "execute");
287                 exit(1);
288         }
289 }
290 //**********************************************************************************************************************
291
292 void TreeGroupCommand::createTree(){
293         try {
294                 //create tree
295                 t = new Tree();
296                 
297                 //do merges and create tree structure by setting parents and children
298                 //there are numGroups - 1 merges to do
299                 for (int i = 0; i < (numGroups - 1); i++) {
300                         float largest = -1000.0;
301
302                         int row, column;
303                         //find largest value in sims matrix by searching lower triangle
304                         for (int j = 1; j < simMatrix.size(); j++) {
305                                 for (int k = 0; k < j; k++) {
306                                         if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
307                                 }
308                         }
309
310                         //set non-leaf node info and update leaves to know their parents
311                         //non-leaf
312                         t->tree[numGroups + i].setChildren(index[row], index[column]);
313                         
314                         //parents
315                         t->tree[index[row]].setParent(numGroups + i);
316                         t->tree[index[column]].setParent(numGroups + i);
317                         
318                         //blength = distance / 2;
319                         float blength = ((1.0 - largest) / 2);
320                         
321                         //branchlengths
322                         t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
323                         t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
324                         
325                         //set your length to leaves to your childs length plus branchlength
326                         t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
327                         
328                         
329                         //update index 
330                         index[row] = numGroups+i;
331                         index[column] = numGroups+i;
332                         
333                         //remove highest value that caused the merge.
334                         simMatrix[row][column] = -1000.0;
335                         simMatrix[column][row] = -1000.0;
336                         
337                         //merge values in simsMatrix
338                         for (int n = 0; n < simMatrix.size(); n++)      {
339                                 //row becomes merge of 2 groups
340                                 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
341                                 simMatrix[n][row] = simMatrix[row][n];
342                                 //delete column
343                                 simMatrix[column][n] = -1000.0;
344                                 simMatrix[n][column] = -1000.0;
345                         }
346                 }
347                 
348                 //adjust tree to make sure root to tip length is .5
349                 int root = t->findRoot();
350                 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
351                 
352                 //assemble tree
353                 t->assembleTree();
354                 
355                 //print newick file
356                 t->createNewickFile(outputFile);
357                 
358                 //delete tree
359                 delete t;
360         
361         }
362         catch(exception& e) {
363                 errorOut(e, "TreeGroupCommand", "createTree");
364                 exit(1);
365         }
366 }
367 /***********************************************************/
368 void TreeGroupCommand::printSims(ostream& out) {
369         try {
370                 
371                 //output column headers
372                 //out << '\t';
373                 //for (int i = 0; i < lookup.size(); i++) {     out << lookup[i]->getGroup() << '\t';           }
374                 //out << endl;
375                 
376                 
377                 for (int m = 0; m < simMatrix.size(); m++)      {
378                         //out << lookup[m]->getGroup() << '\t';
379                         for (int n = 0; n < simMatrix.size(); n++)      {
380                                 out << simMatrix[m][n] << '\t'; 
381                         }
382                         out << endl;
383                 }
384
385         }
386         catch(exception& e) {
387                 errorOut(e, "TreeGroupCommand", "printSims");
388                 exit(1);
389         }
390 }
391 /***********************************************************/
392 void TreeGroupCommand::makeSimsDist() {
393         try {
394                 numGroups = list->size();
395                 
396                 //initialize index
397                 index.clear();
398                 for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
399                 
400                 //initialize simMatrix
401                 simMatrix.clear();
402                 simMatrix.resize(numGroups);
403                 for (int m = 0; m < simMatrix.size(); m++)      {
404                         for (int j = 0; j < simMatrix.size(); j++)      {
405                                 simMatrix[m].push_back(0.0);
406                         }
407                 }
408                 
409                 //go through sparse matrix and fill sims
410                 //go through each cell in the sparsematrix
411                 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
412                         //similairity = -(distance-1)
413                         simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);   
414                         simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);                           
415                 }
416
417
418         }
419         catch(exception& e) {
420                 errorOut(e, "TreeGroupCommand", "makeSimsDist");
421                 exit(1);
422         }
423 }
424
425 /***********************************************************/
426 void TreeGroupCommand::makeSimsShared() {
427         try {
428                 int count = 1;  
429         
430                 //clear globaldatas old tree names if any
431                 globaldata->Treenames.clear();
432                 
433                 //fills globaldatas tree names
434                 globaldata->Treenames = globaldata->Groups;
435                 
436                 //create treemap class from groupmap for tree class to use
437                 tmap = new TreeMap();
438                 tmap->makeSim(globaldata->gGroupmap);
439                 globaldata->gTreemap = tmap;
440                 
441                 set<string> processedLabels;
442                 set<string> userLabels = labels;
443                 set<int> userLines = lines;
444
445                 //as long as you are not at the end of the file or done wih the lines you want
446                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
447                 
448                         if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){                       
449                                 mothurOut(lookup[0]->getLabel() + "\t" + toString(count)); mothurOutEndLine();
450                                 process(lookup);
451                                 
452                                 processedLabels.insert(lookup[0]->getLabel());
453                                 userLabels.erase(lookup[0]->getLabel());
454                                 userLines.erase(count);
455                         }
456                         
457                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
458                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
459                                 lookup = input->getSharedRAbundVectors(lastLabel);
460
461                                 mothurOut(lookup[0]->getLabel() + "\t" + toString(count)); mothurOutEndLine();
462                                 process(lookup);
463                                         
464                                 processedLabels.insert(lookup[0]->getLabel());
465                                 userLabels.erase(lookup[0]->getLabel());
466                         }
467
468                         lastLabel = lookup[0]->getLabel();                      
469                         
470                         //get next line to process
471                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
472                         lookup = input->getSharedRAbundVectors();
473                         count++;
474                 }
475                 
476                 //output error messages about any remaining user labels
477                 set<string>::iterator it;
478                 bool needToRun = false;
479                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
480                         mothurOut("Your file does not include the label " + *it); 
481                         if (processedLabels.count(lastLabel) != 1) {
482                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
483                                 needToRun = true;
484                         }else {
485                                 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
486                         }
487                 }
488                 
489                 //run last line if you need to
490                 if (needToRun == true)  {
491                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
492                         lookup = input->getSharedRAbundVectors(lastLabel);
493
494                         mothurOut(lookup[0]->getLabel() + "\t" + toString(count)); mothurOutEndLine();
495                         process(lookup);
496                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }         
497                 }
498                 
499                 for(int i = 0 ; i < treeCalculators.size(); i++) {  delete treeCalculators[i]; }
500         }
501         catch(exception& e) {
502                 errorOut(e, "TreeGroupCommand", "makeSimsShared");
503                 exit(1);
504         }
505 }
506
507 /***********************************************************/
508 void TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
509         try{
510                                 EstOutput data;
511                                 vector<SharedRAbundVector*> subset;
512                                 numGroups = thisLookup.size();
513                                 
514                                 //for each calculator                                                                                           
515                                 for(int i = 0 ; i < treeCalculators.size(); i++) {
516                                         //initialize simMatrix
517                                         simMatrix.clear();
518                                         simMatrix.resize(numGroups);
519                                         for (int m = 0; m < simMatrix.size(); m++)      {
520                                                 for (int j = 0; j < simMatrix.size(); j++)      {
521                                                         simMatrix[m].push_back(0.0);
522                                                 }
523                                         }
524                 
525                                         //initialize index
526                                         index.clear();
527                                         for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
528                 
529                                         //create a new filename
530                                         outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";                         
531                                                                                                 
532                                         for (int k = 0; k < thisLookup.size(); k++) { 
533                                                 for (int l = k; l < thisLookup.size(); l++) {
534                                                         if (k != l) { //we dont need to similiarity of a groups to itself
535                                                                 //get estimated similarity between 2 groups
536                                                                 
537                                                                 subset.clear(); //clear out old pair of sharedrabunds
538                                                                 //add new pair of sharedrabunds
539                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
540                                                                 
541                                                                 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
542                                                                 //save values in similarity matrix
543                                                                 simMatrix[k][l] = data[0];
544                                                                 simMatrix[l][k] = data[0];
545                                                         }
546                                                 }
547                                         }
548                                 
549                                         //creates tree from similarity matrix and write out file
550                                         createTree();
551                                 }
552
553         }
554         catch(exception& e) {
555                 errorOut(e, "TreeGroupCommand", "process");
556                 exit(1);
557         }
558 }
559 /***********************************************************/
560
561         
562