2 * bootstrapsharedcommand.cpp
5 * Created by Sarah Westcott on 4/16/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "bootstrapsharedcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
23 //**********************************************************************************************************************
25 BootSharedCommand::BootSharedCommand(string option){
27 globaldata = GlobalData::getInstance();
35 //allow user to run help
36 if(option == "help") { help(); abort = true; }
39 //valid paramters for this command
40 string Array[] = {"line","label","calc","groups","iters"};
41 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43 OptionParser parser(option);
44 map<string,string> parameters = parser.getParameters();
46 ValidParameters validParameter;
48 //check to make sure all parameters are valid for command
49 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
50 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
53 //make sure the user has already run the read.otu command
54 if (globaldata->getSharedFile() == "") {
55 if (globaldata->getListFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
56 else if (globaldata->getGroupFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
59 //check for optional parameter and set defaults
60 // ...at some point should added some additional type checking...
61 line = validParameter.validFile(parameters, "line", false);
62 if (line == "not found") { line = ""; }
64 if(line != "all") { splitAtDash(line, lines); allLines = 0; }
65 else { allLines = 1; }
68 label = validParameter.validFile(parameters, "label", false);
69 if (label == "not found") { label = ""; }
71 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
72 else { allLines = 1; }
75 //make sure user did not use both the line and label parameters
76 if ((line != "") && (label != "")) { mothurOut("You cannot use both the line and label parameters at the same time. "); mothurOutEndLine(); abort = true; }
77 //if the user has not specified any line or labels use the ones from read.otu
78 else if((line == "") && (label == "")) {
79 allLines = globaldata->allLines;
80 labels = globaldata->labels;
81 lines = globaldata->lines;
84 groups = validParameter.validFile(parameters, "groups", false);
85 if (groups == "not found") { groups = ""; }
87 splitAtDash(groups, Groups);
88 globaldata->Groups = Groups;
91 calc = validParameter.validFile(parameters, "calc", false);
92 if (calc == "not found") { calc = "jclass-thetayc"; }
94 if (calc == "default") { calc = "jclass-thetayc"; }
96 splitAtDash(calc, Estimators);
99 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
100 convert(temp, iters);
102 if (abort == false) {
104 validCalculator = new ValidCalculators();
107 for (i=0; i<Estimators.size(); i++) {
108 if (validCalculator->isValidCalculator("boot", Estimators[i]) == true) {
109 if (Estimators[i] == "jabund") {
110 treeCalculators.push_back(new JAbund());
111 }else if (Estimators[i] == "sorabund") {
112 treeCalculators.push_back(new SorAbund());
113 }else if (Estimators[i] == "jclass") {
114 treeCalculators.push_back(new Jclass());
115 }else if (Estimators[i] == "sorclass") {
116 treeCalculators.push_back(new SorClass());
117 }else if (Estimators[i] == "jest") {
118 treeCalculators.push_back(new Jest());
119 }else if (Estimators[i] == "sorest") {
120 treeCalculators.push_back(new SorEst());
121 }else if (Estimators[i] == "thetayc") {
122 treeCalculators.push_back(new ThetaYC());
123 }else if (Estimators[i] == "thetan") {
124 treeCalculators.push_back(new ThetaN());
125 }else if (Estimators[i] == "morisitahorn") {
126 treeCalculators.push_back(new MorHorn());
127 }else if (Estimators[i] == "braycurtis") {
128 treeCalculators.push_back(new BrayCurtis());
133 delete validCalculator;
136 for (int i=0; i < treeCalculators.size(); i++) {
137 tempo = new ofstream;
138 out.push_back(tempo);
144 catch(exception& e) {
145 errorOut(e, "BootSharedCommand", "BootSharedCommand");
150 //**********************************************************************************************************************
152 void BootSharedCommand::help(){
154 mothurOut("The bootstrap.shared command can only be executed after a successful read.otu command.\n");
155 mothurOut("The bootstrap.shared command parameters are groups, calc, iters, line and label. You may not use line and label at the same time.\n");
156 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
157 mothurOut("The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n");
158 mothurOut("The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels, iters=yourIters).\n");
159 mothurOut("Example bootstrap.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund, iters=100).\n");
160 mothurOut("The default value for groups is all the groups in your groupfile.\n");
161 mothurOut("The default value for calc is jclass-thetayc. The default for iters is 1000.\n");
163 catch(exception& e) {
164 errorOut(e, "BootSharedCommand", "help");
169 //**********************************************************************************************************************
171 BootSharedCommand::~BootSharedCommand(){
172 //made new in execute
173 if (abort == false) {
174 delete input; globaldata->ginput = NULL;
177 globaldata->gorder = NULL;
181 //**********************************************************************************************************************
183 int BootSharedCommand::execute(){
186 if (abort == true) { return 0; }
189 util = new SharedUtil();
192 read = new ReadOTUFile(globaldata->inputFileName);
193 read->read(&*globaldata);
194 input = globaldata->ginput;
195 order = input->getSharedOrderVector();
196 string lastLabel = order->getLabel();
198 //if the users entered no valid calculators don't execute command
199 if (treeCalculators.size() == 0) { return 0; }
201 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
202 set<string> processedLabels;
203 set<string> userLabels = labels;
204 set<int> userLines = lines;
207 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
208 numGroups = globaldata->Groups.size();
210 //clear globaldatas old tree names if any
211 globaldata->Treenames.clear();
213 //fills globaldatas tree names
214 globaldata->Treenames = globaldata->Groups;
216 //create treemap class from groupmap for tree class to use
217 tmap = new TreeMap();
218 tmap->makeSim(globaldata->gGroupmap);
219 globaldata->gTreemap = tmap;
221 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
223 if(allLines == 1 || lines.count(count) == 1 || labels.count(order->getLabel()) == 1){
225 mothurOut(order->getLabel() + "\t" + toString(count)); mothurOutEndLine();
228 processedLabels.insert(order->getLabel());
229 userLabels.erase(order->getLabel());
230 userLines.erase(count);
233 //you have a label the user want that is smaller than this line and the last line has not already been processed
234 if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
237 order = input->getSharedOrderVector(lastLabel);
238 mothurOut(order->getLabel() + "\t" + toString(count)); mothurOutEndLine();
241 processedLabels.insert(order->getLabel());
242 userLabels.erase(order->getLabel());
246 lastLabel = order->getLabel();
248 //get next line to process
250 order = input->getSharedOrderVector();
254 //output error messages about any remaining user labels
255 set<string>::iterator it;
256 bool needToRun = false;
257 for (it = userLabels.begin(); it != userLabels.end(); it++) {
258 mothurOut("Your file does not include the label " + *it);
259 if (processedLabels.count(lastLabel) != 1) {
260 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
263 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
267 //run last line if you need to
268 if (needToRun == true) {
270 order = input->getSharedOrderVector(lastLabel);
271 mothurOut(order->getLabel() + "\t" + toString(count)); mothurOutEndLine();
277 //reset groups parameter
278 globaldata->Groups.clear();
282 catch(exception& e) {
283 errorOut(e, "BootSharedCommand", "execute");
287 //**********************************************************************************************************************
289 void BootSharedCommand::createTree(ostream* out){
294 //do merges and create tree structure by setting parents and children
295 //there are numGroups - 1 merges to do
296 for (int i = 0; i < (numGroups - 1); i++) {
298 float largest = -1.0;
300 //find largest value in sims matrix by searching lower triangle
301 for (int j = 1; j < simMatrix.size(); j++) {
302 for (int k = 0; k < j; k++) {
303 if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; }
307 //set non-leaf node info and update leaves to know their parents
309 t->tree[numGroups + i].setChildren(index[row], index[column]);
312 t->tree[index[row]].setParent(numGroups + i);
313 t->tree[index[column]].setParent(numGroups + i);
315 //blength = distance / 2;
316 float blength = ((1.0 - largest) / 2);
319 t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
320 t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
322 //set your length to leaves to your childs length plus branchlength
323 t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
327 index[row] = numGroups+i;
328 index[column] = numGroups+i;
330 //zero out highest value that caused the merge.
331 simMatrix[row][column] = -1.0;
332 simMatrix[column][row] = -1.0;
334 //merge values in simsMatrix
335 for (int n = 0; n < simMatrix.size(); n++) {
336 //row becomes merge of 2 groups
337 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
338 simMatrix[n][row] = simMatrix[row][n];
340 simMatrix[column][n] = -1.0;
341 simMatrix[n][column] = -1.0;
355 catch(exception& e) {
356 errorOut(e, "BootSharedCommand", "createTree");
360 /***********************************************************/
361 void BootSharedCommand::printSims() {
363 mothurOut("simsMatrix"); mothurOutEndLine();
364 for (int m = 0; m < simMatrix.size(); m++) {
365 for (int n = 0; n < simMatrix.size(); n++) {
366 mothurOut(simMatrix[m][n]); mothurOut("\t");
372 catch(exception& e) {
373 errorOut(e, "BootSharedCommand", "printSims");
377 /***********************************************************/
378 void BootSharedCommand::process(SharedOrderVector* order) {
381 vector<SharedRAbundVector*> subset;
383 //open an ostream for each calc to print to
384 for (int z = 0; z < treeCalculators.size(); z++) {
385 //create a new filename
386 outputFile = getRootName(globaldata->inputFileName) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre";
387 openOutputFile(outputFile, *(out[z]));
390 //create a file for each calculator with the 1000 trees in it.
391 for (int p = 0; p < iters; p++) {
393 util->getSharedVectorswithReplacement(Groups, lookup, order); //fills group vectors from order vector.
395 //for each calculator
396 for(int i = 0 ; i < treeCalculators.size(); i++) {
398 //initialize simMatrix
400 simMatrix.resize(numGroups);
401 for (int m = 0; m < simMatrix.size(); m++) {
402 for (int j = 0; j < simMatrix.size(); j++) {
403 simMatrix[m].push_back(0.0);
409 for (int g = 0; g < numGroups; g++) { index[g] = g; }
411 for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare
412 for (int l = k; l < lookup.size(); l++) {
413 if (k != l) { //we dont need to similiarity of a groups to itself
414 subset.clear(); //clear out old pair of sharedrabunds
415 //add new pair of sharedrabunds
416 subset.push_back(lookup[k]); subset.push_back(lookup[l]);
418 //get estimated similarity between 2 groups
419 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
420 //save values in similarity matrix
421 simMatrix[k][l] = data[0];
422 simMatrix[l][k] = data[0];
427 //creates tree from similarity matrix and write out file
431 //close ostream for each calc
432 for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); }
435 catch(exception& e) {
436 errorOut(e, "BootSharedCommand", "process");
440 /***********************************************************/