]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.cpp
fixed some issues while testing 1.6
[mothur.git] / treegroupscommand.cpp
1 /*
2  *  treegroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 TreeGroupCommand::TreeGroupCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 lines.clear();
31                 labels.clear();
32                 Groups.clear();
33                 Estimators.clear();
34                 
35                 //allow user to run help
36                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
37                 
38                 else {
39                         //valid paramters for this command
40                         string Array[] =  {"line","label","calc","groups", "phylip", "column", "name", "precision","cutoff"};
41                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
42                         
43                         OptionParser parser(option);
44                         map<string, string> parameters = parser. getParameters();
45                         
46                         ValidParameters validParameter;
47                 
48                         //check to make sure all parameters are valid for command
49                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
50                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
51                         }
52                         
53                         //required parameters
54                         phylipfile = validParameter.validFile(parameters, "phylip", true);
55                         if (phylipfile == "not open") { abort = true; }
56                         else if (phylipfile == "not found") { phylipfile = ""; }        
57                         else {  format = "phylip";      }
58                         
59                         columnfile = validParameter.validFile(parameters, "column", true);
60                         if (columnfile == "not open") { abort = true; } 
61                         else if (columnfile == "not found") { columnfile = ""; }
62                         else {  format = "column";      }
63                         
64                         namefile = validParameter.validFile(parameters, "name", true);
65                         if (namefile == "not open") { abort = true; }   
66                         else if (namefile == "not found") { namefile = ""; }
67                         else {  globaldata->setNameFile(namefile);      }
68                         
69                         format = globaldata->getFormat();
70                         
71                         //error checking on files                       
72                         if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == "")))  { mothurOut("You must run the read.otu command or provide a distance file before running the tree.shared command."); mothurOutEndLine(); abort = true; }
73                         else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When running the tree.shared command with a distance file you may not use both the column and the phylip parameters."); mothurOutEndLine(); abort = true; }
74                         
75                         if (columnfile != "") {
76                                 if (namefile == "") {  mothurOut("You need to provide a namefile if you are going to use the column format."); mothurOutEndLine(); abort = true; }
77                         }
78
79                         //check for optional parameter and set defaults
80                         // ...at some point should added some additional type checking...
81                         line = validParameter.validFile(parameters, "line", false);                             
82                         if (line == "not found") { line = "";  }
83                         else { 
84                                 if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
85                                 else { allLines = 1;  }
86                         }
87                         
88                         label = validParameter.validFile(parameters, "label", false);                   
89                         if (label == "not found") { label = ""; }
90                         else { 
91                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
92                                 else { allLines = 1;  }
93                         }
94                         
95                         //make sure user did not use both the line and label parameters
96                         if ((line != "") && (label != "")) { mothurOut("You cannot use both the line and label parameters at the same time. "); mothurOutEndLine(); abort = true; }
97                         //if the user has not specified any line or labels use the ones from read.otu
98                         else if((line == "") && (label == "")) {  
99                                 allLines = globaldata->allLines; 
100                                 labels = globaldata->labels; 
101                                 lines = globaldata->lines;
102                         }
103                                 
104                         groups = validParameter.validFile(parameters, "groups", false);                 
105                         if (groups == "not found") { groups = ""; }
106                         else { 
107                                 splitAtDash(groups, Groups);
108                                 globaldata->Groups = Groups;
109                         }
110                                 
111                         calc = validParameter.validFile(parameters, "calc", false);                     
112                         if (calc == "not found") { calc = "jclass-thetayc";  }
113                         else { 
114                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
115                         }
116                         splitAtDash(calc, Estimators);
117
118                         string temp;
119                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
120                         convert(temp, precision); 
121                         
122                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
123                         convert(temp, cutoff); 
124                         cutoff += (5 / (precision * 10.0));
125
126                                 
127                         if (abort == false) {
128                         
129                                 validCalculator = new ValidCalculators();
130                                 
131                                 if (format == "sharedfile") {
132                                         int i;
133                                         for (i=0; i<Estimators.size(); i++) {
134                                                 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) { 
135                                                         if (Estimators[i] == "jabund") {        
136                                                                 treeCalculators.push_back(new JAbund());
137                                                         }else if (Estimators[i] == "sorabund") { 
138                                                                 treeCalculators.push_back(new SorAbund());
139                                                         }else if (Estimators[i] == "jclass") { 
140                                                                 treeCalculators.push_back(new Jclass());
141                                                         }else if (Estimators[i] == "sorclass") { 
142                                                                 treeCalculators.push_back(new SorClass());
143                                                         }else if (Estimators[i] == "jest") { 
144                                                                 treeCalculators.push_back(new Jest());
145                                                         }else if (Estimators[i] == "sorest") { 
146                                                                 treeCalculators.push_back(new SorEst());
147                                                         }else if (Estimators[i] == "thetayc") { 
148                                                                 treeCalculators.push_back(new ThetaYC());
149                                                         }else if (Estimators[i] == "thetan") { 
150                                                                 treeCalculators.push_back(new ThetaN());
151                                                         }else if (Estimators[i] == "morisitahorn") { 
152                                                                 treeCalculators.push_back(new MorHorn());
153                                                         }else if (Estimators[i] == "braycurtis") { 
154                                                                 treeCalculators.push_back(new BrayCurtis());
155                                                         }
156                                                 }
157                                         }
158                                 }
159                         }       
160                 }
161
162         }
163         catch(exception& e) {
164                 errorOut(e, "TreeGroupCommand", "TreeGroupCommand");
165                 exit(1);
166         }
167 }
168
169 //**********************************************************************************************************************
170
171 void TreeGroupCommand::help(){
172         try {
173                 mothurOut("The tree.shared command creates a .tre to represent the similiarity between groups or sequences.\n");
174                 mothurOut("The tree.shared command can only be executed after a successful read.otu command or by providing a distance file.\n");
175                 mothurOut("The tree.shared command parameters are groups, calc, phylip, column, name, cutoff, precision, line and label.  You may not use line and label at the same time.\n");
176                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
177                 mothurOut("The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n");
178                 mothurOut("The phylip or column parameter are required if you do not run the read.otu command first, and only one may be used.  If you use a column file the name filename is required. \n");
179                 mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
180                 mothurOut("The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels).\n");
181                 mothurOut("Example tree.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund).\n");
182                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
183                 mothurOut("The default value for calc is jclass-thetayc.\n");
184                 mothurOut("The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose.\n");
185                 validCalculator->printCalc("treegroup", cout);
186                 mothurOut("Or the tree.shared command can be in the following format: tree.shared(phylip=yourPhylipFile).\n");
187                 mothurOut("Example tree.shared(phylip=abrecovery.dist).\n");
188                 mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
189         }
190         catch(exception& e) {
191                 errorOut(e, "TreeGroupCommand", "help");
192                 exit(1);
193         }
194 }
195
196
197 //**********************************************************************************************************************
198
199 TreeGroupCommand::~TreeGroupCommand(){
200         if (abort == false) {
201                 
202                 if (format == "sharedfile") { delete read;  delete input; globaldata->ginput = NULL;}
203                 else { delete readMatrix;  delete matrix; delete list; }
204                 delete tmap;
205                 delete validCalculator;
206         }
207         
208 }
209
210 //**********************************************************************************************************************
211
212 int TreeGroupCommand::execute(){
213         try {
214         
215                 if (abort == true) { return 0; }
216                 
217                 if (format == "sharedfile") {
218                         //if the users entered no valid calculators don't execute command
219                         if (treeCalculators.size() == 0) { mothurOut("You have given no valid calculators."); mothurOutEndLine(); return 0; }
220
221                         //you have groups
222                         read = new ReadOTUFile(globaldata->inputFileName);      
223                         read->read(&*globaldata); 
224                         
225                         input = globaldata->ginput;
226                         lookup = input->getSharedRAbundVectors();
227                         lastLabel = lookup[0]->getLabel();
228                         
229                         if (lookup.size() < 2) { mothurOut("You have not provided enough valid groups.  I cannot run the command."); mothurOutEndLine(); return 0; }
230                         
231                         //used in tree constructor 
232                         globaldata->runParse = false;
233                         
234                         //create tree file
235                         makeSimsShared();
236                 }else{
237                         //read in dist file
238                         filename = globaldata->inputFileName;
239                 
240                         if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }        
241                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
242                                 
243                         readMatrix->setCutoff(cutoff);
244         
245                         if(namefile != ""){     
246                                 nameMap = new NameAssignment(namefile);
247                                 nameMap->readMap();
248                         }
249                         else{
250                                 nameMap = NULL;
251                         }
252         
253                         readMatrix->read(nameMap);
254                         list = readMatrix->getListVector();
255                         matrix = readMatrix->getMatrix();
256
257                         //make treemap
258                         tmap = new TreeMap();
259                         tmap->makeSim(list);
260                         globaldata->gTreemap = tmap;
261                         
262                         globaldata->Groups = tmap->namesOfGroups;
263                 
264                         //clear globaldatas old tree names if any
265                         globaldata->Treenames.clear();
266                 
267                         //fills globaldatas tree names
268                         globaldata->Treenames = globaldata->Groups;
269                         
270                         //used in tree constructor 
271                         globaldata->runParse = false;
272                         
273                         makeSimsDist();
274
275                         //create a new filename
276                         outputFile = getRootName(globaldata->inputFileName) + "tre";    
277                                 
278                         createTree();
279                         mothurOut("Tree complete. "); mothurOutEndLine();
280                 }
281                                 
282                 //reset groups parameter
283                 globaldata->Groups.clear();  
284
285                 return 0;
286         }
287         catch(exception& e) {
288                 errorOut(e, "TreeGroupCommand", "execute");
289                 exit(1);
290         }
291 }
292 //**********************************************************************************************************************
293
294 void TreeGroupCommand::createTree(){
295         try {
296                 //create tree
297                 t = new Tree();
298                 
299                 //do merges and create tree structure by setting parents and children
300                 //there are numGroups - 1 merges to do
301                 for (int i = 0; i < (numGroups - 1); i++) {
302                         float largest = -1000.0;
303
304                         int row, column;
305                         //find largest value in sims matrix by searching lower triangle
306                         for (int j = 1; j < simMatrix.size(); j++) {
307                                 for (int k = 0; k < j; k++) {
308                                         if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
309                                 }
310                         }
311
312                         //set non-leaf node info and update leaves to know their parents
313                         //non-leaf
314                         t->tree[numGroups + i].setChildren(index[row], index[column]);
315                         
316                         //parents
317                         t->tree[index[row]].setParent(numGroups + i);
318                         t->tree[index[column]].setParent(numGroups + i);
319                         
320                         //blength = distance / 2;
321                         float blength = ((1.0 - largest) / 2);
322                         
323                         //branchlengths
324                         t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
325                         t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
326                         
327                         //set your length to leaves to your childs length plus branchlength
328                         t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
329                         
330                         
331                         //update index 
332                         index[row] = numGroups+i;
333                         index[column] = numGroups+i;
334                         
335                         //remove highest value that caused the merge.
336                         simMatrix[row][column] = -1000.0;
337                         simMatrix[column][row] = -1000.0;
338                         
339                         //merge values in simsMatrix
340                         for (int n = 0; n < simMatrix.size(); n++)      {
341                                 //row becomes merge of 2 groups
342                                 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
343                                 simMatrix[n][row] = simMatrix[row][n];
344                                 //delete column
345                                 simMatrix[column][n] = -1000.0;
346                                 simMatrix[n][column] = -1000.0;
347                         }
348                 }
349                 
350                 //adjust tree to make sure root to tip length is .5
351                 int root = t->findRoot();
352                 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
353                 
354                 //assemble tree
355                 t->assembleTree();
356                 
357                 //print newick file
358                 t->createNewickFile(outputFile);
359                 
360                 //delete tree
361                 delete t;
362         
363         }
364         catch(exception& e) {
365                 errorOut(e, "TreeGroupCommand", "createTree");
366                 exit(1);
367         }
368 }
369 /***********************************************************/
370 void TreeGroupCommand::printSims(ostream& out) {
371         try {
372                 
373                 //output column headers
374                 //out << '\t';
375                 //for (int i = 0; i < lookup.size(); i++) {     out << lookup[i]->getGroup() << '\t';           }
376                 //out << endl;
377                 
378                 
379                 for (int m = 0; m < simMatrix.size(); m++)      {
380                         //out << lookup[m]->getGroup() << '\t';
381                         for (int n = 0; n < simMatrix.size(); n++)      {
382                                 out << simMatrix[m][n] << '\t'; 
383                         }
384                         out << endl;
385                 }
386
387         }
388         catch(exception& e) {
389                 errorOut(e, "TreeGroupCommand", "printSims");
390                 exit(1);
391         }
392 }
393 /***********************************************************/
394 void TreeGroupCommand::makeSimsDist() {
395         try {
396                 numGroups = list->size();
397                 
398                 //initialize index
399                 index.clear();
400                 for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
401                 
402                 //initialize simMatrix
403                 simMatrix.clear();
404                 simMatrix.resize(numGroups);
405                 for (int m = 0; m < simMatrix.size(); m++)      {
406                         for (int j = 0; j < simMatrix.size(); j++)      {
407                                 simMatrix[m].push_back(0.0);
408                         }
409                 }
410                 
411                 //go through sparse matrix and fill sims
412                 //go through each cell in the sparsematrix
413                 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
414                         //similairity = -(distance-1)
415                         simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);   
416                         simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);                           
417                 }
418
419
420         }
421         catch(exception& e) {
422                 errorOut(e, "TreeGroupCommand", "makeSimsDist");
423                 exit(1);
424         }
425 }
426
427 /***********************************************************/
428 void TreeGroupCommand::makeSimsShared() {
429         try {
430                 int count = 1;  
431         
432                 //clear globaldatas old tree names if any
433                 globaldata->Treenames.clear();
434                 
435                 //fills globaldatas tree names
436                 globaldata->Treenames = globaldata->Groups;
437                 
438                 //create treemap class from groupmap for tree class to use
439                 tmap = new TreeMap();
440                 tmap->makeSim(globaldata->gGroupmap);
441                 globaldata->gTreemap = tmap;
442                 
443                 set<string> processedLabels;
444                 set<string> userLabels = labels;
445                 set<int> userLines = lines;
446
447                 //as long as you are not at the end of the file or done wih the lines you want
448                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
449                 
450                         if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){                       
451                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
452                                 process(lookup);
453                                 
454                                 processedLabels.insert(lookup[0]->getLabel());
455                                 userLabels.erase(lookup[0]->getLabel());
456                                 userLines.erase(count);
457                         }
458                         
459                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
460                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
461                                 lookup = input->getSharedRAbundVectors(lastLabel);
462
463                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
464                                 process(lookup);
465                                         
466                                 processedLabels.insert(lookup[0]->getLabel());
467                                 userLabels.erase(lookup[0]->getLabel());
468                         }
469
470                         lastLabel = lookup[0]->getLabel();                      
471                         
472                         //get next line to process
473                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
474                         lookup = input->getSharedRAbundVectors();
475                         count++;
476                 }
477                 
478                 //output error messages about any remaining user labels
479                 set<string>::iterator it;
480                 bool needToRun = false;
481                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
482                         mothurOut("Your file does not include the label " + *it); 
483                         if (processedLabels.count(lastLabel) != 1) {
484                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
485                                 needToRun = true;
486                         }else {
487                                 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
488                         }
489                 }
490                 
491                 //run last line if you need to
492                 if (needToRun == true)  {
493                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {             delete lookup[i]; }             } 
494                         lookup = input->getSharedRAbundVectors(lastLabel);
495
496                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
497                         process(lookup);
498                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }         
499                 }
500                 
501                 for(int i = 0 ; i < treeCalculators.size(); i++) {  delete treeCalculators[i]; }
502         }
503         catch(exception& e) {
504                 errorOut(e, "TreeGroupCommand", "makeSimsShared");
505                 exit(1);
506         }
507 }
508
509 /***********************************************************/
510 void TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
511         try{
512                                 EstOutput data;
513                                 vector<SharedRAbundVector*> subset;
514                                 numGroups = thisLookup.size();
515                                 
516                                 //for each calculator                                                                                           
517                                 for(int i = 0 ; i < treeCalculators.size(); i++) {
518                                         //initialize simMatrix
519                                         simMatrix.clear();
520                                         simMatrix.resize(numGroups);
521                                         for (int m = 0; m < simMatrix.size(); m++)      {
522                                                 for (int j = 0; j < simMatrix.size(); j++)      {
523                                                         simMatrix[m].push_back(0.0);
524                                                 }
525                                         }
526                 
527                                         //initialize index
528                                         index.clear();
529                                         for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
530                 
531                                         //create a new filename
532                                         outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";                         
533                                                                                                 
534                                         for (int k = 0; k < thisLookup.size(); k++) { 
535                                                 for (int l = k; l < thisLookup.size(); l++) {
536                                                         if (k != l) { //we dont need to similiarity of a groups to itself
537                                                                 //get estimated similarity between 2 groups
538                                                                 
539                                                                 subset.clear(); //clear out old pair of sharedrabunds
540                                                                 //add new pair of sharedrabunds
541                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
542                                                                 
543                                                                 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
544                                                                 //save values in similarity matrix
545                                                                 simMatrix[k][l] = data[0];
546                                                                 simMatrix[l][k] = data[0];
547                                                         }
548                                                 }
549                                         }
550                                 
551                                         //creates tree from similarity matrix and write out file
552                                         createTree();
553                                 }
554
555         }
556         catch(exception& e) {
557                 errorOut(e, "TreeGroupCommand", "process");
558                 exit(1);
559         }
560 }
561 /***********************************************************/
562
563         
564