]> git.donarmstrong.com Git - mothur.git/blobdiff - bootstrapsharedcommand.cpp
added hcluster command and fixed some bugs, namely one with smart distancing.
[mothur.git] / bootstrapsharedcommand.cpp
index a94954ae6541e509723dd233ec235b7b384abb4e..1b682c78e3471fe7b4fef8528e8d74243374cd21 100644 (file)
@@ -27,7 +27,6 @@ BootSharedCommand::BootSharedCommand(string option){
                globaldata = GlobalData::getInstance();
                abort = false;
                allLines = 1;
-               lines.clear();
                labels.clear();
                Groups.clear();
                Estimators.clear();
@@ -37,7 +36,7 @@ BootSharedCommand::BootSharedCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"line","label","calc","groups","iters"};
+                       string Array[] =  {"label","calc","groups","iters"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -52,19 +51,12 @@ BootSharedCommand::BootSharedCommand(string option){
                        
                        //make sure the user has already run the read.otu command
                        if (globaldata->getSharedFile() == "") {
-                               if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the bootstrap.shared command." << endl; abort = true; }
-                               else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the bootstrap.shared command." << endl; abort = true; }
+                               if (globaldata->getListFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
+                               else if (globaldata->getGroupFile() == "") { mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); mothurOutEndLine(); abort = true; }
                        }
                        
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
-                       line = validParameter.validFile(parameters, "line", false);                             
-                       if (line == "not found") { line = "";  }
-                       else { 
-                               if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
-                               else { allLines = 1;  }
-                       }
-                       
                        label = validParameter.validFile(parameters, "label", false);                   
                        if (label == "not found") { label = ""; }
                        else { 
@@ -72,13 +64,10 @@ BootSharedCommand::BootSharedCommand(string option){
                                else { allLines = 1;  }
                        }
                        
-                       //make sure user did not use both the line and label parameters
-                       if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; }
-                       //if the user has not specified any line or labels use the ones from read.otu
-                       else if((line == "") && (label == "")) {  
+                       //if the user has not specified any labels use the ones from read.otu
+                       if(label == "") {  
                                allLines = globaldata->allLines; 
                                labels = globaldata->labels; 
-                               lines = globaldata->lines;
                        }
                                
                        groups = validParameter.validFile(parameters, "groups", false);                 
@@ -101,6 +90,9 @@ BootSharedCommand::BootSharedCommand(string option){
                                
                        if (abort == false) {
                        
+                               //used in tree constructor 
+                               globaldata->runParse = false;
+                       
                                validCalculator = new ValidCalculators();
                                
                                int i;
@@ -136,42 +128,37 @@ BootSharedCommand::BootSharedCommand(string option){
                                for (int i=0; i < treeCalculators.size(); i++) {
                                        tempo = new ofstream;
                                        out.push_back(tempo);
-                               }       
+                               }
+                               
+                               //make a vector of tree* for each calculator
+                               trees.resize(treeCalculators.size());
                        }
                }
 
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function BootSharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "BootSharedCommand", "BootSharedCommand");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function BootSharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
 }
 
 //**********************************************************************************************************************
 
 void BootSharedCommand::help(){
        try {
-               cout << "The bootstrap.shared command can only be executed after a successful read.otu command." << "\n";
-               cout << "The bootstrap.shared command parameters are groups, calc, iters, line and label.  You may not use line and label at the same time." << "\n";
-               cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n";
-               cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes." << "\n";
-               cout << "The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels, iters=yourIters)." << "\n";
-               cout << "Example bootstrap.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund, iters=100)." << "\n";
-               cout << "The default value for groups is all the groups in your groupfile." << "\n";
-               cout << "The default value for calc is jclass-thetayc. The default for iters is 1000." << "\n";
+               mothurOut("The bootstrap.shared command can only be executed after a successful read.otu command.\n");
+               mothurOut("The bootstrap.shared command parameters are groups, calc, iters and label.\n");
+               mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
+               mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like trees created for, and is also separated by dashes.\n");
+               mothurOut("The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels, iters=yourIters).\n");
+               mothurOut("Example bootstrap.shared(groups=A-B-C, calc=jabund-sorabund, iters=100).\n");
+               mothurOut("The default value for groups is all the groups in your groupfile.\n");
+               mothurOut("The default value for calc is jclass-thetayc. The default for iters is 1000.\n");
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "BootSharedCommand", "help");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
 }
 
 //**********************************************************************************************************************
@@ -193,7 +180,6 @@ int BootSharedCommand::execute(){
        
                if (abort == true) {    return 0;       }
        
-               int count = 1;
                util = new SharedUtil();        
        
                //read first line
@@ -201,7 +187,7 @@ int BootSharedCommand::execute(){
                read->read(&*globaldata); 
                input = globaldata->ginput;
                order = input->getSharedOrderVector();
-               SharedOrderVector* lastOrder = order;
+               string lastLabel = order->getLabel();
                
                //if the users entered no valid calculators don't execute command
                if (treeCalculators.size() == 0) { return 0; }
@@ -209,7 +195,6 @@ int BootSharedCommand::execute(){
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
                set<string> userLabels = labels;
-               set<int> userLines = lines;
                                
                //set users groups
                util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
@@ -226,83 +211,84 @@ int BootSharedCommand::execute(){
                tmap->makeSim(globaldata->gGroupmap);
                globaldata->gTreemap = tmap;
                        
-               while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
+               while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                
-                       if(allLines == 1 || lines.count(count) == 1 || labels.count(order->getLabel()) == 1){                   
+                       if(allLines == 1 || labels.count(order->getLabel()) == 1){                      
                                
-                               cout << order->getLabel() << '\t' << count << endl;
+                               mothurOut(order->getLabel()); mothurOutEndLine();
                                process(order);
                                
                                processedLabels.insert(order->getLabel());
                                userLabels.erase(order->getLabel());
-                               userLines.erase(count);
                        }
                        
-                       //you have a label the user want that is smaller than this line and the last line has not already been processed
-                       if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastOrder->getLabel()) != 1)) {
-                                                                                       
-                               cout << lastOrder->getLabel() << '\t' << count << endl;
-                               process(lastOrder);
+                       //you have a label the user want that is smaller than this label and the last label has not already been processed
+                       if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = order->getLabel();
+                               
+                               delete order;
+                               order = input->getSharedOrderVector(lastLabel);                                                                                                 
+                               mothurOut(order->getLabel()); mothurOutEndLine();
+                               process(order);
 
-                               processedLabels.insert(lastOrder->getLabel());
-                               userLabels.erase(lastOrder->getLabel());
+                               processedLabels.insert(order->getLabel());
+                               userLabels.erase(order->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               order->setLabel(saveLabel);
                        }
                        
-                       if (count != 1) { delete lastOrder; }
-                       lastOrder = order;                      
+                       
+                       lastLabel = order->getLabel();                  
 
                        //get next line to process
+                       delete order;
                        order = input->getSharedOrderVector();
-                       count++;
                }
                
                //output error messages about any remaining user labels
                set<string>::iterator it;
                bool needToRun = false;
                for (it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       cout << "Your file does not include the label "<< *it
-                       if (processedLabels.count(lastOrder->getLabel()) != 1) {
-                               cout << ". I will use " << lastOrder->getLabel() << "." << endl;
+                       mothurOut("Your file does not include the label " + *it)
+                       if (processedLabels.count(lastLabel) != 1) {
+                               mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
                                needToRun = true;
                        }else {
-                               cout << ". Please refer to " << lastOrder->getLabel() << "." << endl;
+                               mothurOut(". Please refer to " + lastLabel + ".");  mothurOutEndLine();
                        }
                }
                
                //run last line if you need to
                if (needToRun == true)  {
-                       process(lastOrder);                     
-                       cout << lastOrder->getLabel() << '\t' << count << endl;
+                               if (order != NULL) {    delete order;   }
+                               order = input->getSharedOrderVector(lastLabel);                                                                                                 
+                               mothurOut(order->getLabel()); mothurOutEndLine();
+                               process(order);
+                               delete order;
+
                }
                
-               delete lastOrder;
-
                //reset groups parameter
                globaldata->Groups.clear();  
 
                return 0;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "BootSharedCommand", "execute");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 //**********************************************************************************************************************
 
-void BootSharedCommand::createTree(ostream* out){
+void BootSharedCommand::createTree(ostream* out, Tree* t){
        try {
-               //create tree
-               t = new Tree();
                
                //do merges and create tree structure by setting parents and children
                //there are numGroups - 1 merges to do
                for (int i = 0; i < (numGroups - 1); i++) {
                
-                       float largest = -1.0;
+                       float largest = -1000.0;
                        int row, column;
                        //find largest value in sims matrix by searching lower triangle
                        for (int j = 1; j < simMatrix.size(); j++) {
@@ -335,8 +321,8 @@ void BootSharedCommand::createTree(ostream* out){
                        index[column] = numGroups+i;
                        
                        //zero out highest value that caused the merge.
-                       simMatrix[row][column] = -1.0;
-                       simMatrix[column][row] = -1.0;
+                       simMatrix[row][column] = -1000.0;
+                       simMatrix[column][row] = -1000.0;
                
                        //merge values in simsMatrix
                        for (int n = 0; n < simMatrix.size(); n++)      {
@@ -344,10 +330,14 @@ void BootSharedCommand::createTree(ostream* out){
                                simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
                                simMatrix[n][row] = simMatrix[row][n];
                                //delete column
-                               simMatrix[column][n] = -1.0;
-                               simMatrix[n][column] = -1.0;
+                               simMatrix[column][n] = -1000.0;
+                               simMatrix[n][column] = -1000.0;
                        }
                }
+               
+               //adjust tree to make sure root to tip length is .5
+               int root = t->findRoot();
+               t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
 
                //assemble tree
                t->assembleTree();
@@ -355,39 +345,28 @@ void BootSharedCommand::createTree(ostream* out){
                //print newick file
                t->print(*out);
        
-               //delete tree
-               delete t;
-       
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "BootSharedCommand", "createTree");
                exit(1);
        }
 }
 /***********************************************************/
 void BootSharedCommand::printSims() {
        try {
-               cout << "simsMatrix" << endl;
+               mothurOut("simsMatrix"); mothurOutEndLine(); 
                for (int m = 0; m < simMatrix.size(); m++)      {
                        for (int n = 0; n < simMatrix.size(); n++)      {
-                               cout << simMatrix[m][n] << '\t'
+                               mothurOut(simMatrix[m][n]);  mothurOut("\t")
                        }
-                       cout << endl;
+                       mothurOutEndLine(); 
                }
 
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "BootSharedCommand", "printSims");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 /***********************************************************/
 void BootSharedCommand::process(SharedOrderVector* order) {
@@ -395,6 +374,7 @@ void BootSharedCommand::process(SharedOrderVector* order) {
                                EstOutput data;
                                vector<SharedRAbundVector*> subset;
                                
+                               
                                //open an ostream for each calc to print to
                                for (int z = 0; z < treeCalculators.size(); z++) {
                                        //create a new filename
@@ -402,10 +382,13 @@ void BootSharedCommand::process(SharedOrderVector* order) {
                                        openOutputFile(outputFile, *(out[z]));
                                }
                                
+                               mothurOut("Generating bootstrap trees..."); cout.flush();
+                               
                                //create a file for each calculator with the 1000 trees in it.
                                for (int p = 0; p < iters; p++) {
                                        
-                                       util->getSharedVectorswithReplacement(Groups, lookup, order);  //fills group vectors from order vector.
+                                       util->getSharedVectorswithReplacement(globaldata->Groups, lookup, order);  //fills group vectors from order vector.
+
                                
                                        //for each calculator                                                                                           
                                        for(int i = 0 ; i < treeCalculators.size(); i++) {
@@ -438,23 +421,52 @@ void BootSharedCommand::process(SharedOrderVector* order) {
                                                                }
                                                        }
                                                }
-                               
+                                               
+                                               tempTree = new Tree();
+                                               
                                                //creates tree from similarity matrix and write out file
-                                               createTree(out[i]);
+                                               createTree(out[i], tempTree);
+                                               
+                                               //save trees for consensus command.
+                                               trees[i].push_back(tempTree);
                                        }
                                }
+                               
+                               mothurOut("\tDone."); mothurOutEndLine();
+                               //delete globaldata's tree
+                               //for (int m = 0; m < globaldata->gTree.size(); m++) {  delete globaldata->gTree[m];  }
+                               //globaldata->gTree.clear();
+                               
+                               
+                               //create consensus trees for each bootstrapped tree set
+                               for (int k = 0; k < trees.size(); k++) {
+                                       
+                                       mothurOut("Generating consensus tree for " + treeCalculators[k]->getName()); mothurOutEndLine();
+                                       
+                                       //set global data to calc trees
+                                       globaldata->gTree = trees[k];
+                                       
+                                       string filename = getRootName(globaldata->inputFileName) + treeCalculators[k]->getName() + ".boot" + order->getLabel();
+                                       consensus = new ConcensusCommand(filename);
+                                       consensus->execute();
+                                       delete consensus;
+                                       
+                                       //delete globaldata's tree
+                                       //for (int m = 0; m < globaldata->gTree.size(); m++) {  delete globaldata->gTree[m];  }
+                                       //globaldata->gTree.clear();
+                                       
+                               }
+                               
+                               
+                                       
                                //close ostream for each calc
                                for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); }
-
+       
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "BootSharedCommand", "process");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 /***********************************************************/