]> git.donarmstrong.com Git - mothur.git/blob - bootstrapsharedcommand.cpp
fixed some issues while testing 1.6
[mothur.git] / bootstrapsharedcommand.cpp
1 /*
2  *  bootstrapsharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/16/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "bootstrapsharedcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 BootSharedCommand::BootSharedCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 lines.clear();
31                 labels.clear();
32                 Groups.clear();
33                 Estimators.clear();
34                 
35                 //allow user to run help
36                 if(option == "help") { help(); abort = true; }
37                 
38                 else {
39                         //valid paramters for this command
40                         string Array[] =  {"line","label","calc","groups","iters"};
41                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
42                         
43                         OptionParser parser(option);
44                         map<string,string> parameters = parser.getParameters();
45                         
46                         ValidParameters validParameter;
47                 
48                         //check to make sure all parameters are valid for command
49                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
50                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
51                         }
52                         
53                         //make sure the user has already run the read.otu command
54                         if (globaldata->getSharedFile() == "") {
55                                 if (globaldata->getListFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
56                                 else if (globaldata->getGroupFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
57                         }
58                         
59                         //check for optional parameter and set defaults
60                         // ...at some point should added some additional type checking...
61                         line = validParameter.validFile(parameters, "line", false);                             
62                         if (line == "not found") { line = "";  }
63                         else { 
64                                 if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
65                                 else { allLines = 1;  }
66                         }
67                         
68                         label = validParameter.validFile(parameters, "label", false);                   
69                         if (label == "not found") { label = ""; }
70                         else { 
71                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
72                                 else { allLines = 1;  }
73                         }
74                         
75                         //make sure user did not use both the line and label parameters
76                         if ((line != "") && (label != "")) { mothurOut("You cannot use both the line and label parameters at the same time. "); mothurOutEndLine(); abort = true; }
77                         //if the user has not specified any line or labels use the ones from read.otu
78                         else if((line == "") && (label == "")) {  
79                                 allLines = globaldata->allLines; 
80                                 labels = globaldata->labels; 
81                                 lines = globaldata->lines;
82                         }
83                                 
84                         groups = validParameter.validFile(parameters, "groups", false);                 
85                         if (groups == "not found") { groups = ""; }
86                         else { 
87                                 splitAtDash(groups, Groups);
88                                 globaldata->Groups = Groups;
89                         }
90                                 
91                         calc = validParameter.validFile(parameters, "calc", false);                     
92                         if (calc == "not found") { calc = "jclass-thetayc";  }
93                         else { 
94                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
95                         }
96                         splitAtDash(calc, Estimators);
97
98                         string temp;
99                         temp = validParameter.validFile(parameters, "iters", false);  if (temp == "not found") { temp = "1000"; }
100                         convert(temp, iters); 
101                                 
102                         if (abort == false) {
103                         
104                                 //used in tree constructor 
105                                 globaldata->runParse = false;
106                         
107                                 validCalculator = new ValidCalculators();
108                                 
109                                 int i;
110                                 for (i=0; i<Estimators.size(); i++) {
111                                         if (validCalculator->isValidCalculator("boot", Estimators[i]) == true) { 
112                                                 if (Estimators[i] == "jabund") {        
113                                                         treeCalculators.push_back(new JAbund());
114                                                 }else if (Estimators[i] == "sorabund") { 
115                                                         treeCalculators.push_back(new SorAbund());
116                                                 }else if (Estimators[i] == "jclass") { 
117                                                         treeCalculators.push_back(new Jclass());
118                                                 }else if (Estimators[i] == "sorclass") { 
119                                                         treeCalculators.push_back(new SorClass());
120                                                 }else if (Estimators[i] == "jest") { 
121                                                         treeCalculators.push_back(new Jest());
122                                                 }else if (Estimators[i] == "sorest") { 
123                                                         treeCalculators.push_back(new SorEst());
124                                                 }else if (Estimators[i] == "thetayc") { 
125                                                         treeCalculators.push_back(new ThetaYC());
126                                                 }else if (Estimators[i] == "thetan") { 
127                                                         treeCalculators.push_back(new ThetaN());
128                                                 }else if (Estimators[i] == "morisitahorn") { 
129                                                         treeCalculators.push_back(new MorHorn());
130                                                 }else if (Estimators[i] == "braycurtis") { 
131                                                         treeCalculators.push_back(new BrayCurtis());
132                                                 }
133                                         }
134                                 }
135                                 
136                                 delete validCalculator;
137                                 
138                                 ofstream* tempo;
139                                 for (int i=0; i < treeCalculators.size(); i++) {
140                                         tempo = new ofstream;
141                                         out.push_back(tempo);
142                                 }
143                                 
144                                 //make a vector of tree* for each calculator
145                                 trees.resize(treeCalculators.size());
146                         }
147                 }
148
149         }
150         catch(exception& e) {
151                 errorOut(e, "BootSharedCommand", "BootSharedCommand");
152                 exit(1);
153         }
154 }
155
156 //**********************************************************************************************************************
157
158 void BootSharedCommand::help(){
159         try {
160                 mothurOut("The bootstrap.shared command can only be executed after a successful read.otu command.\n");
161                 mothurOut("The bootstrap.shared command parameters are groups, calc, iters, line and label.  You may not use line and label at the same time.\n");
162                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
163                 mothurOut("The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n");
164                 mothurOut("The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels, iters=yourIters).\n");
165                 mothurOut("Example bootstrap.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund, iters=100).\n");
166                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
167                 mothurOut("The default value for calc is jclass-thetayc. The default for iters is 1000.\n");
168         }
169         catch(exception& e) {
170                 errorOut(e, "BootSharedCommand", "help");
171                 exit(1);
172         }
173 }
174
175 //**********************************************************************************************************************
176
177 BootSharedCommand::~BootSharedCommand(){
178         //made new in execute
179         if (abort == false) {
180                 delete input; globaldata->ginput = NULL;
181                 delete read;
182                 delete util;
183                 globaldata->gorder = NULL;
184         }
185 }
186
187 //**********************************************************************************************************************
188
189 int BootSharedCommand::execute(){
190         try {
191         
192                 if (abort == true) {    return 0;       }
193         
194                 int count = 1;
195                 util = new SharedUtil();        
196         
197                 //read first line
198                 read = new ReadOTUFile(globaldata->inputFileName);      
199                 read->read(&*globaldata); 
200                 input = globaldata->ginput;
201                 order = input->getSharedOrderVector();
202                 string lastLabel = order->getLabel();
203                 
204                 //if the users entered no valid calculators don't execute command
205                 if (treeCalculators.size() == 0) { return 0; }
206
207                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
208                 set<string> processedLabels;
209                 set<string> userLabels = labels;
210                 set<int> userLines = lines;
211                                 
212                 //set users groups
213                 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
214                 numGroups = globaldata->Groups.size();
215                 
216                 //clear globaldatas old tree names if any
217                 globaldata->Treenames.clear();
218                 
219                 //fills globaldatas tree names
220                 globaldata->Treenames = globaldata->Groups;
221                 
222                 //create treemap class from groupmap for tree class to use
223                 tmap = new TreeMap();
224                 tmap->makeSim(globaldata->gGroupmap);
225                 globaldata->gTreemap = tmap;
226                         
227                 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
228                 
229                         if(allLines == 1 || lines.count(count) == 1 || labels.count(order->getLabel()) == 1){                   
230                                 
231                                 mothurOut(order->getLabel()); mothurOutEndLine();
232                                 process(order);
233                                 
234                                 processedLabels.insert(order->getLabel());
235                                 userLabels.erase(order->getLabel());
236                                 userLines.erase(count);
237                         }
238                         
239                         //you have a label the user want that is smaller than this line and the last line has not already been processed
240                         if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
241                                 
242                                 delete order;
243                                 order = input->getSharedOrderVector(lastLabel);                                                                                                 
244                                 mothurOut(order->getLabel()); mothurOutEndLine();
245                                 process(order);
246
247                                 processedLabels.insert(order->getLabel());
248                                 userLabels.erase(order->getLabel());
249                         }
250                         
251                         
252                         lastLabel = order->getLabel();                  
253
254                         //get next line to process
255                         delete order;
256                         order = input->getSharedOrderVector();
257                         count++;
258                 }
259                 
260                 //output error messages about any remaining user labels
261                 set<string>::iterator it;
262                 bool needToRun = false;
263                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
264                         mothurOut("Your file does not include the label " + *it); 
265                         if (processedLabels.count(lastLabel) != 1) {
266                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
267                                 needToRun = true;
268                         }else {
269                                 mothurOut(". Please refer to " + lastLabel + ".");  mothurOutEndLine();
270                         }
271                 }
272                 
273                 //run last line if you need to
274                 if (needToRun == true)  {
275                                 if (order != NULL) {    delete order;   }
276                                 order = input->getSharedOrderVector(lastLabel);                                                                                                 
277                                 mothurOut(order->getLabel()); mothurOutEndLine();
278                                 process(order);
279                                 delete order;
280
281                 }
282                 
283                 
284                 
285                 //reset groups parameter
286                 globaldata->Groups.clear();  
287
288                 return 0;
289         }
290         catch(exception& e) {
291                 errorOut(e, "BootSharedCommand", "execute");
292                 exit(1);
293         }
294 }
295 //**********************************************************************************************************************
296
297 void BootSharedCommand::createTree(ostream* out, Tree* t){
298         try {
299                 
300                 //do merges and create tree structure by setting parents and children
301                 //there are numGroups - 1 merges to do
302                 for (int i = 0; i < (numGroups - 1); i++) {
303                 
304                         float largest = -1000.0;
305                         int row, column;
306                         //find largest value in sims matrix by searching lower triangle
307                         for (int j = 1; j < simMatrix.size(); j++) {
308                                 for (int k = 0; k < j; k++) {
309                                         if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
310                                 }
311                         }
312
313                         //set non-leaf node info and update leaves to know their parents
314                         //non-leaf
315                         t->tree[numGroups + i].setChildren(index[row], index[column]);
316                 
317                         //parents
318                         t->tree[index[row]].setParent(numGroups + i);
319                         t->tree[index[column]].setParent(numGroups + i);
320                         
321                         //blength = distance / 2;
322                         float blength = ((1.0 - largest) / 2);
323                         
324                         //branchlengths
325                         t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
326                         t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
327                 
328                         //set your length to leaves to your childs length plus branchlength
329                         t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
330                         
331                 
332                         //update index 
333                         index[row] = numGroups+i;
334                         index[column] = numGroups+i;
335                         
336                         //zero out highest value that caused the merge.
337                         simMatrix[row][column] = -1000.0;
338                         simMatrix[column][row] = -1000.0;
339                 
340                         //merge values in simsMatrix
341                         for (int n = 0; n < simMatrix.size(); n++)      {
342                                 //row becomes merge of 2 groups
343                                 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
344                                 simMatrix[n][row] = simMatrix[row][n];
345                                 //delete column
346                                 simMatrix[column][n] = -1000.0;
347                                 simMatrix[n][column] = -1000.0;
348                         }
349                 }
350                 
351                 //adjust tree to make sure root to tip length is .5
352                 int root = t->findRoot();
353                 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
354
355                 //assemble tree
356                 t->assembleTree();
357         
358                 //print newick file
359                 t->print(*out);
360         
361         }
362         catch(exception& e) {
363                 errorOut(e, "BootSharedCommand", "createTree");
364                 exit(1);
365         }
366 }
367 /***********************************************************/
368 void BootSharedCommand::printSims() {
369         try {
370                 mothurOut("simsMatrix"); mothurOutEndLine(); 
371                 for (int m = 0; m < simMatrix.size(); m++)      {
372                         for (int n = 0; n < simMatrix.size(); n++)      {
373                                 mothurOut(simMatrix[m][n]);  mothurOut("\t"); 
374                         }
375                         mothurOutEndLine(); 
376                 }
377
378         }
379         catch(exception& e) {
380                 errorOut(e, "BootSharedCommand", "printSims");
381                 exit(1);
382         }
383 }
384 /***********************************************************/
385 void BootSharedCommand::process(SharedOrderVector* order) {
386         try{
387                                 EstOutput data;
388                                 vector<SharedRAbundVector*> subset;
389                                 
390                                 
391                                 //open an ostream for each calc to print to
392                                 for (int z = 0; z < treeCalculators.size(); z++) {
393                                         //create a new filename
394                                         outputFile = getRootName(globaldata->inputFileName) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre";
395                                         openOutputFile(outputFile, *(out[z]));
396                                 }
397                                 
398                                 mothurOut("Generating bootstrap trees..."); cout.flush();
399                                 
400                                 //create a file for each calculator with the 1000 trees in it.
401                                 for (int p = 0; p < iters; p++) {
402                                         
403                                         util->getSharedVectorswithReplacement(globaldata->Groups, lookup, order);  //fills group vectors from order vector.
404
405                                 
406                                         //for each calculator                                                                                           
407                                         for(int i = 0 ; i < treeCalculators.size(); i++) {
408                                         
409                                                 //initialize simMatrix
410                                                 simMatrix.clear();
411                                                 simMatrix.resize(numGroups);
412                                                 for (int m = 0; m < simMatrix.size(); m++)      {
413                                                         for (int j = 0; j < simMatrix.size(); j++)      {
414                                                                 simMatrix[m].push_back(0.0);
415                                                         }
416                                                 }
417                                 
418                                                 //initialize index
419                                                 index.clear();
420                                                 for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
421                                                         
422                                                 for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare
423                                                         for (int l = k; l < lookup.size(); l++) {
424                                                                 if (k != l) { //we dont need to similiarity of a groups to itself
425                                                                         subset.clear(); //clear out old pair of sharedrabunds
426                                                                         //add new pair of sharedrabunds
427                                                                         subset.push_back(lookup[k]); subset.push_back(lookup[l]); 
428                                                                         
429                                                                         //get estimated similarity between 2 groups
430                                                                         data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
431                                                                         //save values in similarity matrix
432                                                                         simMatrix[k][l] = data[0];
433                                                                         simMatrix[l][k] = data[0];
434                                                                 }
435                                                         }
436                                                 }
437                                                 
438                                                 tempTree = new Tree();
439                                                 
440                                                 //creates tree from similarity matrix and write out file
441                                                 createTree(out[i], tempTree);
442                                                 
443                                                 //save trees for consensus command.
444                                                 trees[i].push_back(tempTree);
445                                         }
446                                 }
447                                 
448                                 mothurOut("\tDone."); mothurOutEndLine();
449                                 //delete globaldata's tree
450                                 //for (int m = 0; m < globaldata->gTree.size(); m++) {  delete globaldata->gTree[m];  }
451                                 //globaldata->gTree.clear();
452                                 
453                                 
454                                 //create consensus trees for each bootstrapped tree set
455                                 for (int k = 0; k < trees.size(); k++) {
456                                         
457                                         mothurOut("Generating consensus tree for " + treeCalculators[k]->getName()); mothurOutEndLine();
458                                         
459                                         //set global data to calc trees
460                                         globaldata->gTree = trees[k];
461                                         
462                                         string filename = getRootName(globaldata->inputFileName) + treeCalculators[k]->getName() + ".boot" + order->getLabel();
463                                         consensus = new ConcensusCommand(filename);
464                                         consensus->execute();
465                                         delete consensus;
466                                         
467                                         //delete globaldata's tree
468                                         //for (int m = 0; m < globaldata->gTree.size(); m++) {  delete globaldata->gTree[m];  }
469                                         //globaldata->gTree.clear();
470                                         
471                                 }
472                                 
473                                 
474                                         
475                                 //close ostream for each calc
476                                 for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); }
477         
478         }
479         catch(exception& e) {
480                 errorOut(e, "BootSharedCommand", "process");
481                 exit(1);
482         }
483 }
484 /***********************************************************/
485
486
487