]> git.donarmstrong.com Git - mothur.git/blob - bootstrapsharedcommand.cpp
1b682c78e3471fe7b4fef8528e8d74243374cd21
[mothur.git] / bootstrapsharedcommand.cpp
1 /*
2  *  bootstrapsharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/16/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "bootstrapsharedcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 BootSharedCommand::BootSharedCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 labels.clear();
31                 Groups.clear();
32                 Estimators.clear();
33                 
34                 //allow user to run help
35                 if(option == "help") { help(); abort = true; }
36                 
37                 else {
38                         //valid paramters for this command
39                         string Array[] =  {"label","calc","groups","iters"};
40                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
41                         
42                         OptionParser parser(option);
43                         map<string,string> parameters = parser.getParameters();
44                         
45                         ValidParameters validParameter;
46                 
47                         //check to make sure all parameters are valid for command
48                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
49                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
50                         }
51                         
52                         //make sure the user has already run the read.otu command
53                         if (globaldata->getSharedFile() == "") {
54                                 if (globaldata->getListFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
55                                 else if (globaldata->getGroupFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
56                         }
57                         
58                         //check for optional parameter and set defaults
59                         // ...at some point should added some additional type checking...
60                         label = validParameter.validFile(parameters, "label", false);                   
61                         if (label == "not found") { label = ""; }
62                         else { 
63                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
64                                 else { allLines = 1;  }
65                         }
66                         
67                         //if the user has not specified any labels use the ones from read.otu
68                         if(label == "") {  
69                                 allLines = globaldata->allLines; 
70                                 labels = globaldata->labels; 
71                         }
72                                 
73                         groups = validParameter.validFile(parameters, "groups", false);                 
74                         if (groups == "not found") { groups = ""; }
75                         else { 
76                                 splitAtDash(groups, Groups);
77                                 globaldata->Groups = Groups;
78                         }
79                                 
80                         calc = validParameter.validFile(parameters, "calc", false);                     
81                         if (calc == "not found") { calc = "jclass-thetayc";  }
82                         else { 
83                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
84                         }
85                         splitAtDash(calc, Estimators);
86
87                         string temp;
88                         temp = validParameter.validFile(parameters, "iters", false);  if (temp == "not found") { temp = "1000"; }
89                         convert(temp, iters); 
90                                 
91                         if (abort == false) {
92                         
93                                 //used in tree constructor 
94                                 globaldata->runParse = false;
95                         
96                                 validCalculator = new ValidCalculators();
97                                 
98                                 int i;
99                                 for (i=0; i<Estimators.size(); i++) {
100                                         if (validCalculator->isValidCalculator("boot", Estimators[i]) == true) { 
101                                                 if (Estimators[i] == "jabund") {        
102                                                         treeCalculators.push_back(new JAbund());
103                                                 }else if (Estimators[i] == "sorabund") { 
104                                                         treeCalculators.push_back(new SorAbund());
105                                                 }else if (Estimators[i] == "jclass") { 
106                                                         treeCalculators.push_back(new Jclass());
107                                                 }else if (Estimators[i] == "sorclass") { 
108                                                         treeCalculators.push_back(new SorClass());
109                                                 }else if (Estimators[i] == "jest") { 
110                                                         treeCalculators.push_back(new Jest());
111                                                 }else if (Estimators[i] == "sorest") { 
112                                                         treeCalculators.push_back(new SorEst());
113                                                 }else if (Estimators[i] == "thetayc") { 
114                                                         treeCalculators.push_back(new ThetaYC());
115                                                 }else if (Estimators[i] == "thetan") { 
116                                                         treeCalculators.push_back(new ThetaN());
117                                                 }else if (Estimators[i] == "morisitahorn") { 
118                                                         treeCalculators.push_back(new MorHorn());
119                                                 }else if (Estimators[i] == "braycurtis") { 
120                                                         treeCalculators.push_back(new BrayCurtis());
121                                                 }
122                                         }
123                                 }
124                                 
125                                 delete validCalculator;
126                                 
127                                 ofstream* tempo;
128                                 for (int i=0; i < treeCalculators.size(); i++) {
129                                         tempo = new ofstream;
130                                         out.push_back(tempo);
131                                 }
132                                 
133                                 //make a vector of tree* for each calculator
134                                 trees.resize(treeCalculators.size());
135                         }
136                 }
137
138         }
139         catch(exception& e) {
140                 errorOut(e, "BootSharedCommand", "BootSharedCommand");
141                 exit(1);
142         }
143 }
144
145 //**********************************************************************************************************************
146
147 void BootSharedCommand::help(){
148         try {
149                 mothurOut("The bootstrap.shared command can only be executed after a successful read.otu command.\n");
150                 mothurOut("The bootstrap.shared command parameters are groups, calc, iters and label.\n");
151                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
152                 mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like trees created for, and is also separated by dashes.\n");
153                 mothurOut("The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels, iters=yourIters).\n");
154                 mothurOut("Example bootstrap.shared(groups=A-B-C, calc=jabund-sorabund, iters=100).\n");
155                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
156                 mothurOut("The default value for calc is jclass-thetayc. The default for iters is 1000.\n");
157         }
158         catch(exception& e) {
159                 errorOut(e, "BootSharedCommand", "help");
160                 exit(1);
161         }
162 }
163
164 //**********************************************************************************************************************
165
166 BootSharedCommand::~BootSharedCommand(){
167         //made new in execute
168         if (abort == false) {
169                 delete input; globaldata->ginput = NULL;
170                 delete read;
171                 delete util;
172                 globaldata->gorder = NULL;
173         }
174 }
175
176 //**********************************************************************************************************************
177
178 int BootSharedCommand::execute(){
179         try {
180         
181                 if (abort == true) {    return 0;       }
182         
183                 util = new SharedUtil();        
184         
185                 //read first line
186                 read = new ReadOTUFile(globaldata->inputFileName);      
187                 read->read(&*globaldata); 
188                 input = globaldata->ginput;
189                 order = input->getSharedOrderVector();
190                 string lastLabel = order->getLabel();
191                 
192                 //if the users entered no valid calculators don't execute command
193                 if (treeCalculators.size() == 0) { return 0; }
194
195                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
196                 set<string> processedLabels;
197                 set<string> userLabels = labels;
198                                 
199                 //set users groups
200                 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
201                 numGroups = globaldata->Groups.size();
202                 
203                 //clear globaldatas old tree names if any
204                 globaldata->Treenames.clear();
205                 
206                 //fills globaldatas tree names
207                 globaldata->Treenames = globaldata->Groups;
208                 
209                 //create treemap class from groupmap for tree class to use
210                 tmap = new TreeMap();
211                 tmap->makeSim(globaldata->gGroupmap);
212                 globaldata->gTreemap = tmap;
213                         
214                 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
215                 
216                         if(allLines == 1 || labels.count(order->getLabel()) == 1){                      
217                                 
218                                 mothurOut(order->getLabel()); mothurOutEndLine();
219                                 process(order);
220                                 
221                                 processedLabels.insert(order->getLabel());
222                                 userLabels.erase(order->getLabel());
223                         }
224                         
225                         //you have a label the user want that is smaller than this label and the last label has not already been processed
226                         if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
227                                 string saveLabel = order->getLabel();
228                                 
229                                 delete order;
230                                 order = input->getSharedOrderVector(lastLabel);                                                                                                 
231                                 mothurOut(order->getLabel()); mothurOutEndLine();
232                                 process(order);
233
234                                 processedLabels.insert(order->getLabel());
235                                 userLabels.erase(order->getLabel());
236                                 
237                                 //restore real lastlabel to save below
238                                 order->setLabel(saveLabel);
239                         }
240                         
241                         
242                         lastLabel = order->getLabel();                  
243
244                         //get next line to process
245                         delete order;
246                         order = input->getSharedOrderVector();
247                 }
248                 
249                 //output error messages about any remaining user labels
250                 set<string>::iterator it;
251                 bool needToRun = false;
252                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
253                         mothurOut("Your file does not include the label " + *it); 
254                         if (processedLabels.count(lastLabel) != 1) {
255                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
256                                 needToRun = true;
257                         }else {
258                                 mothurOut(". Please refer to " + lastLabel + ".");  mothurOutEndLine();
259                         }
260                 }
261                 
262                 //run last line if you need to
263                 if (needToRun == true)  {
264                                 if (order != NULL) {    delete order;   }
265                                 order = input->getSharedOrderVector(lastLabel);                                                                                                 
266                                 mothurOut(order->getLabel()); mothurOutEndLine();
267                                 process(order);
268                                 delete order;
269
270                 }
271                 
272                 //reset groups parameter
273                 globaldata->Groups.clear();  
274
275                 return 0;
276         }
277         catch(exception& e) {
278                 errorOut(e, "BootSharedCommand", "execute");
279                 exit(1);
280         }
281 }
282 //**********************************************************************************************************************
283
284 void BootSharedCommand::createTree(ostream* out, Tree* t){
285         try {
286                 
287                 //do merges and create tree structure by setting parents and children
288                 //there are numGroups - 1 merges to do
289                 for (int i = 0; i < (numGroups - 1); i++) {
290                 
291                         float largest = -1000.0;
292                         int row, column;
293                         //find largest value in sims matrix by searching lower triangle
294                         for (int j = 1; j < simMatrix.size(); j++) {
295                                 for (int k = 0; k < j; k++) {
296                                         if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
297                                 }
298                         }
299
300                         //set non-leaf node info and update leaves to know their parents
301                         //non-leaf
302                         t->tree[numGroups + i].setChildren(index[row], index[column]);
303                 
304                         //parents
305                         t->tree[index[row]].setParent(numGroups + i);
306                         t->tree[index[column]].setParent(numGroups + i);
307                         
308                         //blength = distance / 2;
309                         float blength = ((1.0 - largest) / 2);
310                         
311                         //branchlengths
312                         t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
313                         t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
314                 
315                         //set your length to leaves to your childs length plus branchlength
316                         t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
317                         
318                 
319                         //update index 
320                         index[row] = numGroups+i;
321                         index[column] = numGroups+i;
322                         
323                         //zero out highest value that caused the merge.
324                         simMatrix[row][column] = -1000.0;
325                         simMatrix[column][row] = -1000.0;
326                 
327                         //merge values in simsMatrix
328                         for (int n = 0; n < simMatrix.size(); n++)      {
329                                 //row becomes merge of 2 groups
330                                 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
331                                 simMatrix[n][row] = simMatrix[row][n];
332                                 //delete column
333                                 simMatrix[column][n] = -1000.0;
334                                 simMatrix[n][column] = -1000.0;
335                         }
336                 }
337                 
338                 //adjust tree to make sure root to tip length is .5
339                 int root = t->findRoot();
340                 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
341
342                 //assemble tree
343                 t->assembleTree();
344         
345                 //print newick file
346                 t->print(*out);
347         
348         }
349         catch(exception& e) {
350                 errorOut(e, "BootSharedCommand", "createTree");
351                 exit(1);
352         }
353 }
354 /***********************************************************/
355 void BootSharedCommand::printSims() {
356         try {
357                 mothurOut("simsMatrix"); mothurOutEndLine(); 
358                 for (int m = 0; m < simMatrix.size(); m++)      {
359                         for (int n = 0; n < simMatrix.size(); n++)      {
360                                 mothurOut(simMatrix[m][n]);  mothurOut("\t"); 
361                         }
362                         mothurOutEndLine(); 
363                 }
364
365         }
366         catch(exception& e) {
367                 errorOut(e, "BootSharedCommand", "printSims");
368                 exit(1);
369         }
370 }
371 /***********************************************************/
372 void BootSharedCommand::process(SharedOrderVector* order) {
373         try{
374                                 EstOutput data;
375                                 vector<SharedRAbundVector*> subset;
376                                 
377                                 
378                                 //open an ostream for each calc to print to
379                                 for (int z = 0; z < treeCalculators.size(); z++) {
380                                         //create a new filename
381                                         outputFile = getRootName(globaldata->inputFileName) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre";
382                                         openOutputFile(outputFile, *(out[z]));
383                                 }
384                                 
385                                 mothurOut("Generating bootstrap trees..."); cout.flush();
386                                 
387                                 //create a file for each calculator with the 1000 trees in it.
388                                 for (int p = 0; p < iters; p++) {
389                                         
390                                         util->getSharedVectorswithReplacement(globaldata->Groups, lookup, order);  //fills group vectors from order vector.
391
392                                 
393                                         //for each calculator                                                                                           
394                                         for(int i = 0 ; i < treeCalculators.size(); i++) {
395                                         
396                                                 //initialize simMatrix
397                                                 simMatrix.clear();
398                                                 simMatrix.resize(numGroups);
399                                                 for (int m = 0; m < simMatrix.size(); m++)      {
400                                                         for (int j = 0; j < simMatrix.size(); j++)      {
401                                                                 simMatrix[m].push_back(0.0);
402                                                         }
403                                                 }
404                                 
405                                                 //initialize index
406                                                 index.clear();
407                                                 for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
408                                                         
409                                                 for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare
410                                                         for (int l = k; l < lookup.size(); l++) {
411                                                                 if (k != l) { //we dont need to similiarity of a groups to itself
412                                                                         subset.clear(); //clear out old pair of sharedrabunds
413                                                                         //add new pair of sharedrabunds
414                                                                         subset.push_back(lookup[k]); subset.push_back(lookup[l]); 
415                                                                         
416                                                                         //get estimated similarity between 2 groups
417                                                                         data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
418                                                                         //save values in similarity matrix
419                                                                         simMatrix[k][l] = data[0];
420                                                                         simMatrix[l][k] = data[0];
421                                                                 }
422                                                         }
423                                                 }
424                                                 
425                                                 tempTree = new Tree();
426                                                 
427                                                 //creates tree from similarity matrix and write out file
428                                                 createTree(out[i], tempTree);
429                                                 
430                                                 //save trees for consensus command.
431                                                 trees[i].push_back(tempTree);
432                                         }
433                                 }
434                                 
435                                 mothurOut("\tDone."); mothurOutEndLine();
436                                 //delete globaldata's tree
437                                 //for (int m = 0; m < globaldata->gTree.size(); m++) {  delete globaldata->gTree[m];  }
438                                 //globaldata->gTree.clear();
439                                 
440                                 
441                                 //create consensus trees for each bootstrapped tree set
442                                 for (int k = 0; k < trees.size(); k++) {
443                                         
444                                         mothurOut("Generating consensus tree for " + treeCalculators[k]->getName()); mothurOutEndLine();
445                                         
446                                         //set global data to calc trees
447                                         globaldata->gTree = trees[k];
448                                         
449                                         string filename = getRootName(globaldata->inputFileName) + treeCalculators[k]->getName() + ".boot" + order->getLabel();
450                                         consensus = new ConcensusCommand(filename);
451                                         consensus->execute();
452                                         delete consensus;
453                                         
454                                         //delete globaldata's tree
455                                         //for (int m = 0; m < globaldata->gTree.size(); m++) {  delete globaldata->gTree[m];  }
456                                         //globaldata->gTree.clear();
457                                         
458                                 }
459                                 
460                                 
461                                         
462                                 //close ostream for each calc
463                                 for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); }
464         
465         }
466         catch(exception& e) {
467                 errorOut(e, "BootSharedCommand", "process");
468                 exit(1);
469         }
470 }
471 /***********************************************************/
472
473
474