2 * bootstrapsharedcommand.cpp
5 * Created by Sarah Westcott on 4/16/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "bootstrapsharedcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
23 //**********************************************************************************************************************
25 BootSharedCommand::BootSharedCommand(string option){
27 globaldata = GlobalData::getInstance();
35 //allow user to run help
36 if(option == "help") { help(); abort = true; }
39 //valid paramters for this command
40 string Array[] = {"line","label","calc","groups","iters"};
41 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43 OptionParser parser(option);
44 map<string,string> parameters = parser.getParameters();
46 ValidParameters validParameter;
48 //check to make sure all parameters are valid for command
49 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
50 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
53 //make sure the user has already run the read.otu command
54 if (globaldata->getSharedFile() == "") {
55 if (globaldata->getListFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
56 else if (globaldata->getGroupFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
59 //check for optional parameter and set defaults
60 // ...at some point should added some additional type checking...
61 line = validParameter.validFile(parameters, "line", false);
62 if (line == "not found") { line = ""; }
64 if(line != "all") { splitAtDash(line, lines); allLines = 0; }
65 else { allLines = 1; }
68 label = validParameter.validFile(parameters, "label", false);
69 if (label == "not found") { label = ""; }
71 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
72 else { allLines = 1; }
75 //make sure user did not use both the line and label parameters
76 if ((line != "") && (label != "")) { mothurOut("You cannot use both the line and label parameters at the same time. "); mothurOutEndLine(); abort = true; }
77 //if the user has not specified any line or labels use the ones from read.otu
78 else if((line == "") && (label == "")) {
79 allLines = globaldata->allLines;
80 labels = globaldata->labels;
81 lines = globaldata->lines;
84 groups = validParameter.validFile(parameters, "groups", false);
85 if (groups == "not found") { groups = ""; }
87 splitAtDash(groups, Groups);
88 globaldata->Groups = Groups;
91 calc = validParameter.validFile(parameters, "calc", false);
92 if (calc == "not found") { calc = "jclass-thetayc"; }
94 if (calc == "default") { calc = "jclass-thetayc"; }
96 splitAtDash(calc, Estimators);
99 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
100 convert(temp, iters);
102 if (abort == false) {
104 //used in tree constructor
105 globaldata->runParse = false;
107 validCalculator = new ValidCalculators();
110 for (i=0; i<Estimators.size(); i++) {
111 if (validCalculator->isValidCalculator("boot", Estimators[i]) == true) {
112 if (Estimators[i] == "jabund") {
113 treeCalculators.push_back(new JAbund());
114 }else if (Estimators[i] == "sorabund") {
115 treeCalculators.push_back(new SorAbund());
116 }else if (Estimators[i] == "jclass") {
117 treeCalculators.push_back(new Jclass());
118 }else if (Estimators[i] == "sorclass") {
119 treeCalculators.push_back(new SorClass());
120 }else if (Estimators[i] == "jest") {
121 treeCalculators.push_back(new Jest());
122 }else if (Estimators[i] == "sorest") {
123 treeCalculators.push_back(new SorEst());
124 }else if (Estimators[i] == "thetayc") {
125 treeCalculators.push_back(new ThetaYC());
126 }else if (Estimators[i] == "thetan") {
127 treeCalculators.push_back(new ThetaN());
128 }else if (Estimators[i] == "morisitahorn") {
129 treeCalculators.push_back(new MorHorn());
130 }else if (Estimators[i] == "braycurtis") {
131 treeCalculators.push_back(new BrayCurtis());
136 delete validCalculator;
139 for (int i=0; i < treeCalculators.size(); i++) {
140 tempo = new ofstream;
141 out.push_back(tempo);
144 //make a vector of tree* for each calculator
145 trees.resize(treeCalculators.size());
150 catch(exception& e) {
151 errorOut(e, "BootSharedCommand", "BootSharedCommand");
156 //**********************************************************************************************************************
158 void BootSharedCommand::help(){
160 mothurOut("The bootstrap.shared command can only be executed after a successful read.otu command.\n");
161 mothurOut("The bootstrap.shared command parameters are groups, calc, iters, line and label. You may not use line and label at the same time.\n");
162 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
163 mothurOut("The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n");
164 mothurOut("The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels, iters=yourIters).\n");
165 mothurOut("Example bootstrap.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund, iters=100).\n");
166 mothurOut("The default value for groups is all the groups in your groupfile.\n");
167 mothurOut("The default value for calc is jclass-thetayc. The default for iters is 1000.\n");
169 catch(exception& e) {
170 errorOut(e, "BootSharedCommand", "help");
175 //**********************************************************************************************************************
177 BootSharedCommand::~BootSharedCommand(){
178 //made new in execute
179 if (abort == false) {
180 delete input; globaldata->ginput = NULL;
183 globaldata->gorder = NULL;
187 //**********************************************************************************************************************
189 int BootSharedCommand::execute(){
192 if (abort == true) { return 0; }
195 util = new SharedUtil();
198 read = new ReadOTUFile(globaldata->inputFileName);
199 read->read(&*globaldata);
200 input = globaldata->ginput;
201 order = input->getSharedOrderVector();
202 string lastLabel = order->getLabel();
204 //if the users entered no valid calculators don't execute command
205 if (treeCalculators.size() == 0) { return 0; }
207 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
208 set<string> processedLabels;
209 set<string> userLabels = labels;
210 set<int> userLines = lines;
213 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
214 numGroups = globaldata->Groups.size();
216 //clear globaldatas old tree names if any
217 globaldata->Treenames.clear();
219 //fills globaldatas tree names
220 globaldata->Treenames = globaldata->Groups;
222 //create treemap class from groupmap for tree class to use
223 tmap = new TreeMap();
224 tmap->makeSim(globaldata->gGroupmap);
225 globaldata->gTreemap = tmap;
227 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
229 if(allLines == 1 || lines.count(count) == 1 || labels.count(order->getLabel()) == 1){
231 mothurOut(order->getLabel() + "\t" + toString(count)); mothurOutEndLine();
234 processedLabels.insert(order->getLabel());
235 userLabels.erase(order->getLabel());
236 userLines.erase(count);
239 //you have a label the user want that is smaller than this line and the last line has not already been processed
240 if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
243 order = input->getSharedOrderVector(lastLabel);
244 mothurOut(order->getLabel() + "\t" + toString(count)); mothurOutEndLine();
247 processedLabels.insert(order->getLabel());
248 userLabels.erase(order->getLabel());
252 lastLabel = order->getLabel();
254 //get next line to process
256 order = input->getSharedOrderVector();
260 //output error messages about any remaining user labels
261 set<string>::iterator it;
262 bool needToRun = false;
263 for (it = userLabels.begin(); it != userLabels.end(); it++) {
264 mothurOut("Your file does not include the label " + *it);
265 if (processedLabels.count(lastLabel) != 1) {
266 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
269 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
273 //run last line if you need to
274 if (needToRun == true) {
276 order = input->getSharedOrderVector(lastLabel);
277 mothurOut(order->getLabel() + "\t" + toString(count)); mothurOutEndLine();
285 //reset groups parameter
286 globaldata->Groups.clear();
290 catch(exception& e) {
291 errorOut(e, "BootSharedCommand", "execute");
295 //**********************************************************************************************************************
297 void BootSharedCommand::createTree(ostream* out, Tree* t){
300 //do merges and create tree structure by setting parents and children
301 //there are numGroups - 1 merges to do
302 for (int i = 0; i < (numGroups - 1); i++) {
304 float largest = -1000.0;
306 //find largest value in sims matrix by searching lower triangle
307 for (int j = 1; j < simMatrix.size(); j++) {
308 for (int k = 0; k < j; k++) {
309 if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; }
313 //set non-leaf node info and update leaves to know their parents
315 t->tree[numGroups + i].setChildren(index[row], index[column]);
318 t->tree[index[row]].setParent(numGroups + i);
319 t->tree[index[column]].setParent(numGroups + i);
321 //blength = distance / 2;
322 float blength = ((1.0 - largest) / 2);
325 t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
326 t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
328 //set your length to leaves to your childs length plus branchlength
329 t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
333 index[row] = numGroups+i;
334 index[column] = numGroups+i;
336 //zero out highest value that caused the merge.
337 simMatrix[row][column] = -1000.0;
338 simMatrix[column][row] = -1000.0;
340 //merge values in simsMatrix
341 for (int n = 0; n < simMatrix.size(); n++) {
342 //row becomes merge of 2 groups
343 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
344 simMatrix[n][row] = simMatrix[row][n];
346 simMatrix[column][n] = -1000.0;
347 simMatrix[n][column] = -1000.0;
351 //adjust tree to make sure root to tip length is .5
352 int root = t->findRoot();
353 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
362 catch(exception& e) {
363 errorOut(e, "BootSharedCommand", "createTree");
367 /***********************************************************/
368 void BootSharedCommand::printSims() {
370 mothurOut("simsMatrix"); mothurOutEndLine();
371 for (int m = 0; m < simMatrix.size(); m++) {
372 for (int n = 0; n < simMatrix.size(); n++) {
373 mothurOut(simMatrix[m][n]); mothurOut("\t");
379 catch(exception& e) {
380 errorOut(e, "BootSharedCommand", "printSims");
384 /***********************************************************/
385 void BootSharedCommand::process(SharedOrderVector* order) {
388 vector<SharedRAbundVector*> subset;
391 //open an ostream for each calc to print to
392 for (int z = 0; z < treeCalculators.size(); z++) {
393 //create a new filename
394 outputFile = getRootName(globaldata->inputFileName) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre";
395 openOutputFile(outputFile, *(out[z]));
398 mothurOut("Generating bootstrap trees..."); cout.flush();
400 //create a file for each calculator with the 1000 trees in it.
401 for (int p = 0; p < iters; p++) {
403 util->getSharedVectorswithReplacement(globaldata->Groups, lookup, order); //fills group vectors from order vector.
406 //for each calculator
407 for(int i = 0 ; i < treeCalculators.size(); i++) {
409 //initialize simMatrix
411 simMatrix.resize(numGroups);
412 for (int m = 0; m < simMatrix.size(); m++) {
413 for (int j = 0; j < simMatrix.size(); j++) {
414 simMatrix[m].push_back(0.0);
420 for (int g = 0; g < numGroups; g++) { index[g] = g; }
422 for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare
423 for (int l = k; l < lookup.size(); l++) {
424 if (k != l) { //we dont need to similiarity of a groups to itself
425 subset.clear(); //clear out old pair of sharedrabunds
426 //add new pair of sharedrabunds
427 subset.push_back(lookup[k]); subset.push_back(lookup[l]);
429 //get estimated similarity between 2 groups
430 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
431 //save values in similarity matrix
432 simMatrix[k][l] = data[0];
433 simMatrix[l][k] = data[0];
438 tempTree = new Tree();
440 //creates tree from similarity matrix and write out file
441 createTree(out[i], tempTree);
443 //save trees for consensus command.
444 trees[i].push_back(tempTree);
448 mothurOut("\tDone."); mothurOutEndLine();
449 //delete globaldata's tree
450 for (int m = 0; m < globaldata->gTree.size(); m++) { delete globaldata->gTree[m]; }
454 //create consensus trees for each bootstrapped tree set
455 for (int k = 0; k < trees.size(); k++) {
457 mothurOut("Generating consensus tree for " + treeCalculators[k]->getName()); mothurOutEndLine();
459 //set global data to calc trees
460 globaldata->gTree = trees[k];
462 string filename = getRootName(globaldata->inputFileName) + treeCalculators[k]->getName() + ".boot" + order->getLabel();
463 consensus = new ConcensusCommand(filename);
464 consensus->execute();
467 //delete globaldata's tree
468 for (int m = 0; m < globaldata->gTree.size(); m++) { delete globaldata->gTree[m]; }
475 //close ostream for each calc
476 for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); }
479 catch(exception& e) {
480 errorOut(e, "BootSharedCommand", "process");
484 /***********************************************************/