]> git.donarmstrong.com Git - mothur.git/blob - bootstrapsharedcommand.cpp
added logfile feature
[mothur.git] / bootstrapsharedcommand.cpp
1 /*
2  *  bootstrapsharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/16/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "bootstrapsharedcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 BootSharedCommand::BootSharedCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 lines.clear();
31                 labels.clear();
32                 Groups.clear();
33                 Estimators.clear();
34                 
35                 //allow user to run help
36                 if(option == "help") { help(); abort = true; }
37                 
38                 else {
39                         //valid paramters for this command
40                         string Array[] =  {"line","label","calc","groups","iters"};
41                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
42                         
43                         OptionParser parser(option);
44                         map<string,string> parameters = parser.getParameters();
45                         
46                         ValidParameters validParameter;
47                 
48                         //check to make sure all parameters are valid for command
49                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
50                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
51                         }
52                         
53                         //make sure the user has already run the read.otu command
54                         if (globaldata->getSharedFile() == "") {
55                                 if (globaldata->getListFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
56                                 else if (globaldata->getGroupFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
57                         }
58                         
59                         //check for optional parameter and set defaults
60                         // ...at some point should added some additional type checking...
61                         line = validParameter.validFile(parameters, "line", false);                             
62                         if (line == "not found") { line = "";  }
63                         else { 
64                                 if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
65                                 else { allLines = 1;  }
66                         }
67                         
68                         label = validParameter.validFile(parameters, "label", false);                   
69                         if (label == "not found") { label = ""; }
70                         else { 
71                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
72                                 else { allLines = 1;  }
73                         }
74                         
75                         //make sure user did not use both the line and label parameters
76                         if ((line != "") && (label != "")) { mothurOut("You cannot use both the line and label parameters at the same time. "); mothurOutEndLine(); abort = true; }
77                         //if the user has not specified any line or labels use the ones from read.otu
78                         else if((line == "") && (label == "")) {  
79                                 allLines = globaldata->allLines; 
80                                 labels = globaldata->labels; 
81                                 lines = globaldata->lines;
82                         }
83                                 
84                         groups = validParameter.validFile(parameters, "groups", false);                 
85                         if (groups == "not found") { groups = ""; }
86                         else { 
87                                 splitAtDash(groups, Groups);
88                                 globaldata->Groups = Groups;
89                         }
90                                 
91                         calc = validParameter.validFile(parameters, "calc", false);                     
92                         if (calc == "not found") { calc = "jclass-thetayc";  }
93                         else { 
94                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
95                         }
96                         splitAtDash(calc, Estimators);
97
98                         string temp;
99                         temp = validParameter.validFile(parameters, "iters", false);  if (temp == "not found") { temp = "1000"; }
100                         convert(temp, iters); 
101                                 
102                         if (abort == false) {
103                         
104                                 validCalculator = new ValidCalculators();
105                                 
106                                 int i;
107                                 for (i=0; i<Estimators.size(); i++) {
108                                         if (validCalculator->isValidCalculator("boot", Estimators[i]) == true) { 
109                                                 if (Estimators[i] == "jabund") {        
110                                                         treeCalculators.push_back(new JAbund());
111                                                 }else if (Estimators[i] == "sorabund") { 
112                                                         treeCalculators.push_back(new SorAbund());
113                                                 }else if (Estimators[i] == "jclass") { 
114                                                         treeCalculators.push_back(new Jclass());
115                                                 }else if (Estimators[i] == "sorclass") { 
116                                                         treeCalculators.push_back(new SorClass());
117                                                 }else if (Estimators[i] == "jest") { 
118                                                         treeCalculators.push_back(new Jest());
119                                                 }else if (Estimators[i] == "sorest") { 
120                                                         treeCalculators.push_back(new SorEst());
121                                                 }else if (Estimators[i] == "thetayc") { 
122                                                         treeCalculators.push_back(new ThetaYC());
123                                                 }else if (Estimators[i] == "thetan") { 
124                                                         treeCalculators.push_back(new ThetaN());
125                                                 }else if (Estimators[i] == "morisitahorn") { 
126                                                         treeCalculators.push_back(new MorHorn());
127                                                 }else if (Estimators[i] == "braycurtis") { 
128                                                         treeCalculators.push_back(new BrayCurtis());
129                                                 }
130                                         }
131                                 }
132                                 
133                                 delete validCalculator;
134                                 
135                                 ofstream* tempo;
136                                 for (int i=0; i < treeCalculators.size(); i++) {
137                                         tempo = new ofstream;
138                                         out.push_back(tempo);
139                                 }       
140                         }
141                 }
142
143         }
144         catch(exception& e) {
145                 errorOut(e, "BootSharedCommand", "BootSharedCommand");
146                 exit(1);
147         }
148 }
149
150 //**********************************************************************************************************************
151
152 void BootSharedCommand::help(){
153         try {
154                 mothurOut("The bootstrap.shared command can only be executed after a successful read.otu command.\n");
155                 mothurOut("The bootstrap.shared command parameters are groups, calc, iters, line and label.  You may not use line and label at the same time.\n");
156                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
157                 mothurOut("The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n");
158                 mothurOut("The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels, iters=yourIters).\n");
159                 mothurOut("Example bootstrap.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund, iters=100).\n");
160                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
161                 mothurOut("The default value for calc is jclass-thetayc. The default for iters is 1000.\n");
162         }
163         catch(exception& e) {
164                 errorOut(e, "BootSharedCommand", "help");
165                 exit(1);
166         }
167 }
168
169 //**********************************************************************************************************************
170
171 BootSharedCommand::~BootSharedCommand(){
172         //made new in execute
173         if (abort == false) {
174                 delete input; globaldata->ginput = NULL;
175                 delete read;
176                 delete util;
177                 globaldata->gorder = NULL;
178         }
179 }
180
181 //**********************************************************************************************************************
182
183 int BootSharedCommand::execute(){
184         try {
185         
186                 if (abort == true) {    return 0;       }
187         
188                 int count = 1;
189                 util = new SharedUtil();        
190         
191                 //read first line
192                 read = new ReadOTUFile(globaldata->inputFileName);      
193                 read->read(&*globaldata); 
194                 input = globaldata->ginput;
195                 order = input->getSharedOrderVector();
196                 string lastLabel = order->getLabel();
197                 
198                 //if the users entered no valid calculators don't execute command
199                 if (treeCalculators.size() == 0) { return 0; }
200
201                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
202                 set<string> processedLabels;
203                 set<string> userLabels = labels;
204                 set<int> userLines = lines;
205                                 
206                 //set users groups
207                 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
208                 numGroups = globaldata->Groups.size();
209                 
210                 //clear globaldatas old tree names if any
211                 globaldata->Treenames.clear();
212                 
213                 //fills globaldatas tree names
214                 globaldata->Treenames = globaldata->Groups;
215                 
216                 //create treemap class from groupmap for tree class to use
217                 tmap = new TreeMap();
218                 tmap->makeSim(globaldata->gGroupmap);
219                 globaldata->gTreemap = tmap;
220                         
221                 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
222                 
223                         if(allLines == 1 || lines.count(count) == 1 || labels.count(order->getLabel()) == 1){                   
224                                 
225                                 mothurOut(order->getLabel() + "\t" + toString(count)); mothurOutEndLine();
226                                 process(order);
227                                 
228                                 processedLabels.insert(order->getLabel());
229                                 userLabels.erase(order->getLabel());
230                                 userLines.erase(count);
231                         }
232                         
233                         //you have a label the user want that is smaller than this line and the last line has not already been processed
234                         if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
235                                 
236                                 delete order;
237                                 order = input->getSharedOrderVector(lastLabel);                                                                                                 
238                                 mothurOut(order->getLabel() + "\t" + toString(count)); mothurOutEndLine();
239                                 process(order);
240
241                                 processedLabels.insert(order->getLabel());
242                                 userLabels.erase(order->getLabel());
243                         }
244                         
245                         
246                         lastLabel = order->getLabel();                  
247
248                         //get next line to process
249                         delete order;
250                         order = input->getSharedOrderVector();
251                         count++;
252                 }
253                 
254                 //output error messages about any remaining user labels
255                 set<string>::iterator it;
256                 bool needToRun = false;
257                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
258                         mothurOut("Your file does not include the label " + *it); 
259                         if (processedLabels.count(lastLabel) != 1) {
260                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
261                                 needToRun = true;
262                         }else {
263                                 mothurOut(". Please refer to " + lastLabel + ".");  mothurOutEndLine();
264                         }
265                 }
266                 
267                 //run last line if you need to
268                 if (needToRun == true)  {
269                                 delete order;
270                                 order = input->getSharedOrderVector(lastLabel);                                                                                                 
271                                 mothurOut(order->getLabel() + "\t" + toString(count)); mothurOutEndLine();
272                                 process(order);
273                                 delete order;
274
275                 }
276
277                 //reset groups parameter
278                 globaldata->Groups.clear();  
279
280                 return 0;
281         }
282         catch(exception& e) {
283                 errorOut(e, "BootSharedCommand", "execute");
284                 exit(1);
285         }
286 }
287 //**********************************************************************************************************************
288
289 void BootSharedCommand::createTree(ostream* out){
290         try {
291                 //create tree
292                 t = new Tree();
293                 
294                 //do merges and create tree structure by setting parents and children
295                 //there are numGroups - 1 merges to do
296                 for (int i = 0; i < (numGroups - 1); i++) {
297                 
298                         float largest = -1.0;
299                         int row, column;
300                         //find largest value in sims matrix by searching lower triangle
301                         for (int j = 1; j < simMatrix.size(); j++) {
302                                 for (int k = 0; k < j; k++) {
303                                         if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
304                                 }
305                         }
306
307                         //set non-leaf node info and update leaves to know their parents
308                         //non-leaf
309                         t->tree[numGroups + i].setChildren(index[row], index[column]);
310                 
311                         //parents
312                         t->tree[index[row]].setParent(numGroups + i);
313                         t->tree[index[column]].setParent(numGroups + i);
314                         
315                         //blength = distance / 2;
316                         float blength = ((1.0 - largest) / 2);
317                         
318                         //branchlengths
319                         t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
320                         t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
321                 
322                         //set your length to leaves to your childs length plus branchlength
323                         t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
324                         
325                 
326                         //update index 
327                         index[row] = numGroups+i;
328                         index[column] = numGroups+i;
329                         
330                         //zero out highest value that caused the merge.
331                         simMatrix[row][column] = -1.0;
332                         simMatrix[column][row] = -1.0;
333                 
334                         //merge values in simsMatrix
335                         for (int n = 0; n < simMatrix.size(); n++)      {
336                                 //row becomes merge of 2 groups
337                                 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
338                                 simMatrix[n][row] = simMatrix[row][n];
339                                 //delete column
340                                 simMatrix[column][n] = -1.0;
341                                 simMatrix[n][column] = -1.0;
342                         }
343                 }
344
345                 //assemble tree
346                 t->assembleTree();
347         
348                 //print newick file
349                 t->print(*out);
350         
351                 //delete tree
352                 delete t;
353         
354         }
355         catch(exception& e) {
356                 errorOut(e, "BootSharedCommand", "createTree");
357                 exit(1);
358         }
359 }
360 /***********************************************************/
361 void BootSharedCommand::printSims() {
362         try {
363                 mothurOut("simsMatrix"); mothurOutEndLine(); 
364                 for (int m = 0; m < simMatrix.size(); m++)      {
365                         for (int n = 0; n < simMatrix.size(); n++)      {
366                                 mothurOut(simMatrix[m][n]);  mothurOut("\t"); 
367                         }
368                         mothurOutEndLine(); 
369                 }
370
371         }
372         catch(exception& e) {
373                 errorOut(e, "BootSharedCommand", "printSims");
374                 exit(1);
375         }
376 }
377 /***********************************************************/
378 void BootSharedCommand::process(SharedOrderVector* order) {
379         try{
380                                 EstOutput data;
381                                 vector<SharedRAbundVector*> subset;
382                                 
383                                 //open an ostream for each calc to print to
384                                 for (int z = 0; z < treeCalculators.size(); z++) {
385                                         //create a new filename
386                                         outputFile = getRootName(globaldata->inputFileName) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre";
387                                         openOutputFile(outputFile, *(out[z]));
388                                 }
389                                 
390                                 //create a file for each calculator with the 1000 trees in it.
391                                 for (int p = 0; p < iters; p++) {
392                                         
393                                         util->getSharedVectorswithReplacement(Groups, lookup, order);  //fills group vectors from order vector.
394                                 
395                                         //for each calculator                                                                                           
396                                         for(int i = 0 ; i < treeCalculators.size(); i++) {
397                                         
398                                                 //initialize simMatrix
399                                                 simMatrix.clear();
400                                                 simMatrix.resize(numGroups);
401                                                 for (int m = 0; m < simMatrix.size(); m++)      {
402                                                         for (int j = 0; j < simMatrix.size(); j++)      {
403                                                                 simMatrix[m].push_back(0.0);
404                                                         }
405                                                 }
406                                 
407                                                 //initialize index
408                                                 index.clear();
409                                                 for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
410                                                         
411                                                 for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare
412                                                         for (int l = k; l < lookup.size(); l++) {
413                                                                 if (k != l) { //we dont need to similiarity of a groups to itself
414                                                                         subset.clear(); //clear out old pair of sharedrabunds
415                                                                         //add new pair of sharedrabunds
416                                                                         subset.push_back(lookup[k]); subset.push_back(lookup[l]); 
417                                                                         
418                                                                         //get estimated similarity between 2 groups
419                                                                         data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
420                                                                         //save values in similarity matrix
421                                                                         simMatrix[k][l] = data[0];
422                                                                         simMatrix[l][k] = data[0];
423                                                                 }
424                                                         }
425                                                 }
426                                 
427                                                 //creates tree from similarity matrix and write out file
428                                                 createTree(out[i]);
429                                         }
430                                 }
431                                 //close ostream for each calc
432                                 for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); }
433
434         }
435         catch(exception& e) {
436                 errorOut(e, "BootSharedCommand", "process");
437                 exit(1);
438         }
439 }
440 /***********************************************************/
441
442
443