]> git.donarmstrong.com Git - mothur.git/blob - bootstrapsharedcommand.cpp
0f758c15a0eafc98d620d2fb37980be6f221a767
[mothur.git] / bootstrapsharedcommand.cpp
1 /*
2  *  bootstrapsharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/16/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "bootstrapsharedcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 BootSharedCommand::BootSharedCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 labels.clear();
31                 Groups.clear();
32                 Estimators.clear();
33                 
34                 //allow user to run help
35                 if(option == "help") { help(); abort = true; }
36                 
37                 else {
38                         //valid paramters for this command
39                         string Array[] =  {"label","calc","groups","iters"};
40                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
41                         
42                         OptionParser parser(option);
43                         map<string,string> parameters = parser.getParameters();
44                         
45                         ValidParameters validParameter;
46                 
47                         //check to make sure all parameters are valid for command
48                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
49                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
50                         }
51                         
52                         //make sure the user has already run the read.otu command
53                         if (globaldata->getSharedFile() == "") {
54                                 if (globaldata->getListFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
55                                 else if (globaldata->getGroupFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
56                         }
57                         
58                         //check for optional parameter and set defaults
59                         // ...at some point should added some additional type checking...
60                         label = validParameter.validFile(parameters, "label", false);                   
61                         if (label == "not found") { label = ""; }
62                         else { 
63                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
64                                 else { allLines = 1;  }
65                         }
66                         
67                         //if the user has not specified any labels use the ones from read.otu
68                         if(label == "") {  
69                                 allLines = globaldata->allLines; 
70                                 labels = globaldata->labels; 
71                         }
72                                 
73                         groups = validParameter.validFile(parameters, "groups", false);                 
74                         if (groups == "not found") { groups = ""; }
75                         else { 
76                                 splitAtDash(groups, Groups);
77                                 globaldata->Groups = Groups;
78                         }
79                                 
80                         calc = validParameter.validFile(parameters, "calc", false);                     
81                         if (calc == "not found") { calc = "jclass-thetayc";  }
82                         else { 
83                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
84                         }
85                         splitAtDash(calc, Estimators);
86
87                         string temp;
88                         temp = validParameter.validFile(parameters, "iters", false);  if (temp == "not found") { temp = "1000"; }
89                         convert(temp, iters); 
90                                 
91                         if (abort == false) {
92                         
93                                 //used in tree constructor 
94                                 globaldata->runParse = false;
95                         
96                                 validCalculator = new ValidCalculators();
97                                 
98                                 int i;
99                                 for (i=0; i<Estimators.size(); i++) {
100                                         if (validCalculator->isValidCalculator("boot", Estimators[i]) == true) { 
101                                                 if (Estimators[i] == "jabund") {        
102                                                         treeCalculators.push_back(new JAbund());
103                                                 }else if (Estimators[i] == "sorabund") { 
104                                                         treeCalculators.push_back(new SorAbund());
105                                                 }else if (Estimators[i] == "jclass") { 
106                                                         treeCalculators.push_back(new Jclass());
107                                                 }else if (Estimators[i] == "sorclass") { 
108                                                         treeCalculators.push_back(new SorClass());
109                                                 }else if (Estimators[i] == "jest") { 
110                                                         treeCalculators.push_back(new Jest());
111                                                 }else if (Estimators[i] == "sorest") { 
112                                                         treeCalculators.push_back(new SorEst());
113                                                 }else if (Estimators[i] == "thetayc") { 
114                                                         treeCalculators.push_back(new ThetaYC());
115                                                 }else if (Estimators[i] == "thetan") { 
116                                                         treeCalculators.push_back(new ThetaN());
117                                                 }else if (Estimators[i] == "morisitahorn") { 
118                                                         treeCalculators.push_back(new MorHorn());
119                                                 }else if (Estimators[i] == "braycurtis") { 
120                                                         treeCalculators.push_back(new BrayCurtis());
121                                                 }
122                                         }
123                                 }
124                                 
125                                 delete validCalculator;
126                                 
127                                 ofstream* tempo;
128                                 for (int i=0; i < treeCalculators.size(); i++) {
129                                         tempo = new ofstream;
130                                         out.push_back(tempo);
131                                 }
132                                 
133                                 //make a vector of tree* for each calculator
134                                 trees.resize(treeCalculators.size());
135                         }
136                 }
137
138         }
139         catch(exception& e) {
140                 errorOut(e, "BootSharedCommand", "BootSharedCommand");
141                 exit(1);
142         }
143 }
144
145 //**********************************************************************************************************************
146
147 void BootSharedCommand::help(){
148         try {
149                 mothurOut("The bootstrap.shared command can only be executed after a successful read.otu command.\n");
150                 mothurOut("The bootstrap.shared command parameters are groups, calc, iters and label.\n");
151                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
152                 mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like trees created for, and is also separated by dashes.\n");
153                 mothurOut("The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels, iters=yourIters).\n");
154                 mothurOut("Example bootstrap.shared(groups=A-B-C, calc=jabund-sorabund, iters=100).\n");
155                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
156                 mothurOut("The default value for calc is jclass-thetayc. The default for iters is 1000.\n");
157         }
158         catch(exception& e) {
159                 errorOut(e, "BootSharedCommand", "help");
160                 exit(1);
161         }
162 }
163
164 //**********************************************************************************************************************
165
166 BootSharedCommand::~BootSharedCommand(){
167         //made new in execute
168         if (abort == false) {
169                 delete input; globaldata->ginput = NULL;
170                 delete read;
171                 delete util;
172                 globaldata->gorder = NULL;
173         }
174 }
175
176 //**********************************************************************************************************************
177
178 int BootSharedCommand::execute(){
179         try {
180         
181                 if (abort == true) {    return 0;       }
182         
183                 util = new SharedUtil();        
184         
185                 //read first line
186                 read = new ReadOTUFile(globaldata->inputFileName);      
187                 read->read(&*globaldata); 
188                 input = globaldata->ginput;
189                 order = input->getSharedOrderVector();
190                 string lastLabel = order->getLabel();
191                 
192                 //if the users entered no valid calculators don't execute command
193                 if (treeCalculators.size() == 0) { return 0; }
194
195                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
196                 set<string> processedLabels;
197                 set<string> userLabels = labels;
198                                 
199                 //set users groups
200                 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
201                 numGroups = globaldata->Groups.size();
202                 
203                 //clear globaldatas old tree names if any
204                 globaldata->Treenames.clear();
205                 
206                 //fills globaldatas tree names
207                 globaldata->Treenames = globaldata->Groups;
208                 
209                 //create treemap class from groupmap for tree class to use
210                 tmap = new TreeMap();
211                 tmap->makeSim(globaldata->gGroupmap);
212                 globaldata->gTreemap = tmap;
213                         
214                 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
215                 
216                         if(allLines == 1 || labels.count(order->getLabel()) == 1){                      
217                                 
218                                 mothurOut(order->getLabel()); mothurOutEndLine();
219                                 process(order);
220                                 
221                                 processedLabels.insert(order->getLabel());
222                                 userLabels.erase(order->getLabel());
223                         }
224                         
225                         //you have a label the user want that is smaller than this label and the last label has not already been processed
226                         if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
227                                 
228                                 delete order;
229                                 order = input->getSharedOrderVector(lastLabel);                                                                                                 
230                                 mothurOut(order->getLabel()); mothurOutEndLine();
231                                 process(order);
232
233                                 processedLabels.insert(order->getLabel());
234                                 userLabels.erase(order->getLabel());
235                         }
236                         
237                         
238                         lastLabel = order->getLabel();                  
239
240                         //get next line to process
241                         delete order;
242                         order = input->getSharedOrderVector();
243                 }
244                 
245                 //output error messages about any remaining user labels
246                 set<string>::iterator it;
247                 bool needToRun = false;
248                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
249                         mothurOut("Your file does not include the label " + *it); 
250                         if (processedLabels.count(lastLabel) != 1) {
251                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
252                                 needToRun = true;
253                         }else {
254                                 mothurOut(". Please refer to " + lastLabel + ".");  mothurOutEndLine();
255                         }
256                 }
257                 
258                 //run last line if you need to
259                 if (needToRun == true)  {
260                                 if (order != NULL) {    delete order;   }
261                                 order = input->getSharedOrderVector(lastLabel);                                                                                                 
262                                 mothurOut(order->getLabel()); mothurOutEndLine();
263                                 process(order);
264                                 delete order;
265
266                 }
267                 
268                 //reset groups parameter
269                 globaldata->Groups.clear();  
270
271                 return 0;
272         }
273         catch(exception& e) {
274                 errorOut(e, "BootSharedCommand", "execute");
275                 exit(1);
276         }
277 }
278 //**********************************************************************************************************************
279
280 void BootSharedCommand::createTree(ostream* out, Tree* t){
281         try {
282                 
283                 //do merges and create tree structure by setting parents and children
284                 //there are numGroups - 1 merges to do
285                 for (int i = 0; i < (numGroups - 1); i++) {
286                 
287                         float largest = -1000.0;
288                         int row, column;
289                         //find largest value in sims matrix by searching lower triangle
290                         for (int j = 1; j < simMatrix.size(); j++) {
291                                 for (int k = 0; k < j; k++) {
292                                         if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
293                                 }
294                         }
295
296                         //set non-leaf node info and update leaves to know their parents
297                         //non-leaf
298                         t->tree[numGroups + i].setChildren(index[row], index[column]);
299                 
300                         //parents
301                         t->tree[index[row]].setParent(numGroups + i);
302                         t->tree[index[column]].setParent(numGroups + i);
303                         
304                         //blength = distance / 2;
305                         float blength = ((1.0 - largest) / 2);
306                         
307                         //branchlengths
308                         t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
309                         t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
310                 
311                         //set your length to leaves to your childs length plus branchlength
312                         t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
313                         
314                 
315                         //update index 
316                         index[row] = numGroups+i;
317                         index[column] = numGroups+i;
318                         
319                         //zero out highest value that caused the merge.
320                         simMatrix[row][column] = -1000.0;
321                         simMatrix[column][row] = -1000.0;
322                 
323                         //merge values in simsMatrix
324                         for (int n = 0; n < simMatrix.size(); n++)      {
325                                 //row becomes merge of 2 groups
326                                 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
327                                 simMatrix[n][row] = simMatrix[row][n];
328                                 //delete column
329                                 simMatrix[column][n] = -1000.0;
330                                 simMatrix[n][column] = -1000.0;
331                         }
332                 }
333                 
334                 //adjust tree to make sure root to tip length is .5
335                 int root = t->findRoot();
336                 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
337
338                 //assemble tree
339                 t->assembleTree();
340         
341                 //print newick file
342                 t->print(*out);
343         
344         }
345         catch(exception& e) {
346                 errorOut(e, "BootSharedCommand", "createTree");
347                 exit(1);
348         }
349 }
350 /***********************************************************/
351 void BootSharedCommand::printSims() {
352         try {
353                 mothurOut("simsMatrix"); mothurOutEndLine(); 
354                 for (int m = 0; m < simMatrix.size(); m++)      {
355                         for (int n = 0; n < simMatrix.size(); n++)      {
356                                 mothurOut(simMatrix[m][n]);  mothurOut("\t"); 
357                         }
358                         mothurOutEndLine(); 
359                 }
360
361         }
362         catch(exception& e) {
363                 errorOut(e, "BootSharedCommand", "printSims");
364                 exit(1);
365         }
366 }
367 /***********************************************************/
368 void BootSharedCommand::process(SharedOrderVector* order) {
369         try{
370                                 EstOutput data;
371                                 vector<SharedRAbundVector*> subset;
372                                 
373                                 
374                                 //open an ostream for each calc to print to
375                                 for (int z = 0; z < treeCalculators.size(); z++) {
376                                         //create a new filename
377                                         outputFile = getRootName(globaldata->inputFileName) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre";
378                                         openOutputFile(outputFile, *(out[z]));
379                                 }
380                                 
381                                 mothurOut("Generating bootstrap trees..."); cout.flush();
382                                 
383                                 //create a file for each calculator with the 1000 trees in it.
384                                 for (int p = 0; p < iters; p++) {
385                                         
386                                         util->getSharedVectorswithReplacement(globaldata->Groups, lookup, order);  //fills group vectors from order vector.
387
388                                 
389                                         //for each calculator                                                                                           
390                                         for(int i = 0 ; i < treeCalculators.size(); i++) {
391                                         
392                                                 //initialize simMatrix
393                                                 simMatrix.clear();
394                                                 simMatrix.resize(numGroups);
395                                                 for (int m = 0; m < simMatrix.size(); m++)      {
396                                                         for (int j = 0; j < simMatrix.size(); j++)      {
397                                                                 simMatrix[m].push_back(0.0);
398                                                         }
399                                                 }
400                                 
401                                                 //initialize index
402                                                 index.clear();
403                                                 for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
404                                                         
405                                                 for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare
406                                                         for (int l = k; l < lookup.size(); l++) {
407                                                                 if (k != l) { //we dont need to similiarity of a groups to itself
408                                                                         subset.clear(); //clear out old pair of sharedrabunds
409                                                                         //add new pair of sharedrabunds
410                                                                         subset.push_back(lookup[k]); subset.push_back(lookup[l]); 
411                                                                         
412                                                                         //get estimated similarity between 2 groups
413                                                                         data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
414                                                                         //save values in similarity matrix
415                                                                         simMatrix[k][l] = data[0];
416                                                                         simMatrix[l][k] = data[0];
417                                                                 }
418                                                         }
419                                                 }
420                                                 
421                                                 tempTree = new Tree();
422                                                 
423                                                 //creates tree from similarity matrix and write out file
424                                                 createTree(out[i], tempTree);
425                                                 
426                                                 //save trees for consensus command.
427                                                 trees[i].push_back(tempTree);
428                                         }
429                                 }
430                                 
431                                 mothurOut("\tDone."); mothurOutEndLine();
432                                 //delete globaldata's tree
433                                 //for (int m = 0; m < globaldata->gTree.size(); m++) {  delete globaldata->gTree[m];  }
434                                 //globaldata->gTree.clear();
435                                 
436                                 
437                                 //create consensus trees for each bootstrapped tree set
438                                 for (int k = 0; k < trees.size(); k++) {
439                                         
440                                         mothurOut("Generating consensus tree for " + treeCalculators[k]->getName()); mothurOutEndLine();
441                                         
442                                         //set global data to calc trees
443                                         globaldata->gTree = trees[k];
444                                         
445                                         string filename = getRootName(globaldata->inputFileName) + treeCalculators[k]->getName() + ".boot" + order->getLabel();
446                                         consensus = new ConcensusCommand(filename);
447                                         consensus->execute();
448                                         delete consensus;
449                                         
450                                         //delete globaldata's tree
451                                         //for (int m = 0; m < globaldata->gTree.size(); m++) {  delete globaldata->gTree[m];  }
452                                         //globaldata->gTree.clear();
453                                         
454                                 }
455                                 
456                                 
457                                         
458                                 //close ostream for each calc
459                                 for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); }
460         
461         }
462         catch(exception& e) {
463                 errorOut(e, "BootSharedCommand", "process");
464                 exit(1);
465         }
466 }
467 /***********************************************************/
468
469
470