]> git.donarmstrong.com Git - mothur.git/blobdiff - bootstrapsharedcommand.cpp
adding mothurout.h and .cpp to repo
[mothur.git] / bootstrapsharedcommand.cpp
index 2835061392732dfa402f626a3d339726a601435a..6b76918044201a7438413c0f695f83c141d5638f 100644 (file)
 
 //**********************************************************************************************************************
 
-BootSharedCommand::BootSharedCommand(){
+BootSharedCommand::BootSharedCommand(string option) {
        try {
                globaldata = GlobalData::getInstance();
-               format = globaldata->getFormat();
-               convert(globaldata->getIters(), iters);
-               validCalculator = new ValidCalculators();
-               util = new SharedUtil();
+               abort = false;
+               allLines = 1;
+               labels.clear();
+               Groups.clear();
+               Estimators.clear();
                
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
                
-               int i;
-               for (i=0; i<globaldata->Estimators.size(); i++) {
-                       if (validCalculator->isValidCalculator("boot", globaldata->Estimators[i]) == true) { 
-                               if (globaldata->Estimators[i] == "jabund") {    
-                                       treeCalculators.push_back(new JAbund());
-                               }else if (globaldata->Estimators[i] == "sorabund") { 
-                                       treeCalculators.push_back(new SorAbund());
-                               }else if (globaldata->Estimators[i] == "jclass") { 
-                                       treeCalculators.push_back(new Jclass());
-                               }else if (globaldata->Estimators[i] == "sorclass") { 
-                                       treeCalculators.push_back(new SorClass());
-                               }else if (globaldata->Estimators[i] == "jest") { 
-                                       treeCalculators.push_back(new Jest());
-                               }else if (globaldata->Estimators[i] == "sorest") { 
-                                       treeCalculators.push_back(new SorEst());
-                               }else if (globaldata->Estimators[i] == "thetayc") { 
-                                       treeCalculators.push_back(new ThetaYC());
-                               }else if (globaldata->Estimators[i] == "thetan") { 
-                                       treeCalculators.push_back(new ThetaN());
-                               }else if (globaldata->Estimators[i] == "morisitahorn") { 
-                                       treeCalculators.push_back(new MorHorn());
-                               }else if (globaldata->Estimators[i] == "braycurtis") { 
-                                       treeCalculators.push_back(new BrayCurtis());
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"label","calc","groups","iters","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+               
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it  
+                       }
+
+                       
+                       //make sure the user has already run the read.otu command
+                       if (globaldata->getSharedFile() == "") {
+                               if (globaldata->getListFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); m->mothurOutEndLine(); abort = true; }
+                               else if (globaldata->getGroupFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the bootstrap.shared command."); m->mothurOutEndLine(); abort = true; }
+                       }
+                       
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; }
+                       else { 
+                               if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+                       
+                       //if the user has not specified any labels use the ones from read.otu
+                       if(label == "") {  
+                               allLines = globaldata->allLines; 
+                               labels = globaldata->labels; 
+                       }
+                               
+                       groups = validParameter.validFile(parameters, "groups", false);                 
+                       if (groups == "not found") { groups = ""; }
+                       else { 
+                               splitAtDash(groups, Groups);
+                               globaldata->Groups = Groups;
+                       }
+                               
+                       calc = validParameter.validFile(parameters, "calc", false);                     
+                       if (calc == "not found") { calc = "jclass-thetayc";  }
+                       else { 
+                                if (calc == "default")  {  calc = "jclass-thetayc";  }
+                       }
+                       splitAtDash(calc, Estimators);
+
+                       string temp;
+                       temp = validParameter.validFile(parameters, "iters", false);  if (temp == "not found") { temp = "1000"; }
+                       convert(temp, iters); 
+                               
+                       if (abort == false) {
+                       
+                               //used in tree constructor 
+                               globaldata->runParse = false;
+                       
+                               validCalculator = new ValidCalculators();
+                               
+                               int i;
+                               for (i=0; i<Estimators.size(); i++) {
+                                       if (validCalculator->isValidCalculator("boot", Estimators[i]) == true) { 
+                                               if (Estimators[i] == "jabund") {        
+                                                       treeCalculators.push_back(new JAbund());
+                                               }else if (Estimators[i] == "sorabund") { 
+                                                       treeCalculators.push_back(new SorAbund());
+                                               }else if (Estimators[i] == "jclass") { 
+                                                       treeCalculators.push_back(new Jclass());
+                                               }else if (Estimators[i] == "sorclass") { 
+                                                       treeCalculators.push_back(new SorClass());
+                                               }else if (Estimators[i] == "jest") { 
+                                                       treeCalculators.push_back(new Jest());
+                                               }else if (Estimators[i] == "sorest") { 
+                                                       treeCalculators.push_back(new SorEst());
+                                               }else if (Estimators[i] == "thetayc") { 
+                                                       treeCalculators.push_back(new ThetaYC());
+                                               }else if (Estimators[i] == "thetan") { 
+                                                       treeCalculators.push_back(new ThetaN());
+                                               }else if (Estimators[i] == "morisitahorn") { 
+                                                       treeCalculators.push_back(new MorHorn());
+                                               }else if (Estimators[i] == "braycurtis") { 
+                                                       treeCalculators.push_back(new BrayCurtis());
+                                               }
+                                       }
                                }
+                               
+                               delete validCalculator;
+                               
+                               ofstream* tempo;
+                               for (int i=0; i < treeCalculators.size(); i++) {
+                                       tempo = new ofstream;
+                                       out.push_back(tempo);
+                               }
+                               
+                               //make a vector of tree* for each calculator
+                               trees.resize(treeCalculators.size());
                        }
                }
-               
-               ofstream* temp;
-               for (int i=0; i < treeCalculators.size(); i++) {
-                       temp = new ofstream;
-                       out.push_back(temp);
-               }
-               
-               //reset calc for next command
-               globaldata->setCalc("");
 
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function BootSharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "BootSharedCommand", "BootSharedCommand");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function BootSharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+}
+
+//**********************************************************************************************************************
+
+void BootSharedCommand::help(){
+       try {
+               m->mothurOut("The bootstrap.shared command can only be executed after a successful read.otu command.\n");
+               m->mothurOut("The bootstrap.shared command parameters are groups, calc, iters and label.\n");
+               m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
+               m->mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like trees created for, and is also separated by dashes.\n");
+               m->mothurOut("The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels, iters=yourIters).\n");
+               m->mothurOut("Example bootstrap.shared(groups=A-B-C, calc=jabund-sorabund, iters=100).\n");
+               m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
+               m->mothurOut("The default value for calc is jclass-thetayc. The default for iters is 1000.\n");
+       }
+       catch(exception& e) {
+               m->errorOut(e, "BootSharedCommand", "help");
                exit(1);
-       }       
+       }
 }
+
 //**********************************************************************************************************************
 
 BootSharedCommand::~BootSharedCommand(){
-       delete input;
-       delete read;
-       delete util;
+       //made new in execute
+       if (abort == false) {
+               delete input; globaldata->ginput = NULL;
+               delete read;
+               delete util;
+               globaldata->gorder = NULL;
+       }
 }
 
 //**********************************************************************************************************************
 
 int BootSharedCommand::execute(){
        try {
-               int count = 1;  
        
-               //if the users entered no valid calculators don't execute command
-               if (treeCalculators.size() == 0) { return 0; }
-               
+               if (abort == true) {    return 0;       }
+       
+               util = new SharedUtil();        
+       
                //read first line
                read = new ReadOTUFile(globaldata->inputFileName);      
                read->read(&*globaldata); 
                input = globaldata->ginput;
                order = input->getSharedOrderVector();
-               SharedOrderVector* lastOrder = order;
+               string lastLabel = order->getLabel();
                
+               //if the users entered no valid calculators don't execute command
+               if (treeCalculators.size() == 0) { return 0; }
+
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
-               set<string> userLabels = globaldata->labels;
+               set<string> userLabels = labels;
                                
                //set users groups
                util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
@@ -120,83 +218,96 @@ int BootSharedCommand::execute(){
                tmap->makeSim(globaldata->gGroupmap);
                globaldata->gTreemap = tmap;
                        
-               while((order != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
+               while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                
-                       if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){                       
+                       if(allLines == 1 || labels.count(order->getLabel()) == 1){                      
                                
-                               cout << order->getLabel() << '\t' << count << endl;
-                               process(order);
+                               m->mothurOut(order->getLabel()); m->mothurOutEndLine();
+                               int error = process(order);
+                               if (error == 1) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } globaldata->Groups.clear(); return 0;        } 
                                
                                processedLabels.insert(order->getLabel());
                                userLabels.erase(order->getLabel());
-                               
-                       //you have a label the user want that is smaller than this line and the last line has not already been processed 
                        }
                        
-                       if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastOrder->getLabel()) != 1)) {
-                                                                                       
-                               cout << lastOrder->getLabel() << '\t' << count << endl;
-                               process(lastOrder);
+                       //you have a label the user want that is smaller than this label and the last label has not already been processed
+                       if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = order->getLabel();
+                               
+                               delete order;
+                               order = input->getSharedOrderVector(lastLabel);                                                                                                 
+                               m->mothurOut(order->getLabel()); m->mothurOutEndLine();
+                               int error = process(order);
+                               if (error == 1) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } globaldata->Groups.clear(); return 0;        } 
 
-                               processedLabels.insert(lastOrder->getLabel());
-                               userLabels.erase(lastOrder->getLabel());
+                               processedLabels.insert(order->getLabel());
+                               userLabels.erase(order->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               order->setLabel(saveLabel);
                        }
                        
-                       if (count != 1) { delete lastOrder; }
-                       lastOrder = order;                      
+                       
+                       lastLabel = order->getLabel();                  
 
                        //get next line to process
+                       delete order;
                        order = input->getSharedOrderVector();
-                       count++;
                }
                
                //output error messages about any remaining user labels
                set<string>::iterator it;
                bool needToRun = false;
                for (it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       cout << "Your file does not include the label "<< *it
-                       if (processedLabels.count(lastOrder->getLabel()) != 1) {
-                               cout << ". I will use " << lastOrder->getLabel() << "." << endl;
+                       m->mothurOut("Your file does not include the label " + *it)
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
                                needToRun = true;
                        }else {
-                               cout << ". Please refer to " << lastOrder->getLabel() << "." << endl;
+                               m->mothurOut(". Please refer to " + lastLabel + ".");  m->mothurOutEndLine();
                        }
                }
                
                //run last line if you need to
                if (needToRun == true)  {
-                       process(lastOrder);                     
-                       cout << lastOrder->getLabel() << '\t' << count << endl;
+                               if (order != NULL) {    delete order;   }
+                               order = input->getSharedOrderVector(lastLabel);                                                                                                 
+                               m->mothurOut(order->getLabel()); m->mothurOutEndLine();
+                               int error = process(order);
+                               if (error == 1) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } globaldata->Groups.clear(); return 0;        } 
+                               
+                               delete order;
+
                }
                
-               delete lastOrder;
-
                //reset groups parameter
-               globaldata->Groups.clear();  globaldata->setGroups("");
+               globaldata->Groups.clear();  
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
 
                return 0;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "BootSharedCommand", "execute");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 //**********************************************************************************************************************
 
-void BootSharedCommand::createTree(ostream* out){
+int BootSharedCommand::createTree(ostream* out, Tree* t){
        try {
-               //create tree
-               t = new Tree();
                
                //do merges and create tree structure by setting parents and children
                //there are numGroups - 1 merges to do
                for (int i = 0; i < (numGroups - 1); i++) {
                
-                       float largest = -1.0;
+                       if (m->control_pressed) {  return 1; }
+               
+                       float largest = -1000.0;
                        int row, column;
                        //find largest value in sims matrix by searching lower triangle
                        for (int j = 1; j < simMatrix.size(); j++) {
@@ -229,8 +340,8 @@ void BootSharedCommand::createTree(ostream* out){
                        index[column] = numGroups+i;
                        
                        //zero out highest value that caused the merge.
-                       simMatrix[row][column] = -1.0;
-                       simMatrix[column][row] = -1.0;
+                       simMatrix[row][column] = -1000.0;
+                       simMatrix[column][row] = -1000.0;
                
                        //merge values in simsMatrix
                        for (int n = 0; n < simMatrix.size(); n++)      {
@@ -238,78 +349,81 @@ void BootSharedCommand::createTree(ostream* out){
                                simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
                                simMatrix[n][row] = simMatrix[row][n];
                                //delete column
-                               simMatrix[column][n] = -1.0;
-                               simMatrix[n][column] = -1.0;
+                               simMatrix[column][n] = -1000.0;
+                               simMatrix[n][column] = -1000.0;
                        }
                }
+               
+               //adjust tree to make sure root to tip length is .5
+               int root = t->findRoot();
+               t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
 
                //assemble tree
                t->assembleTree();
        
                //print newick file
                t->print(*out);
-       
-               //delete tree
-               delete t;
+               
+               return 0;
        
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "BootSharedCommand", "createTree");
                exit(1);
        }
 }
 /***********************************************************/
 void BootSharedCommand::printSims() {
        try {
-               cout << "simsMatrix" << endl;
-               for (int m = 0; m < simMatrix.size(); m++)      {
+               m->mothurOut("simsMatrix"); m->mothurOutEndLine(); 
+               for (int k = 0; k < simMatrix.size(); k++)      {
                        for (int n = 0; n < simMatrix.size(); n++)      {
-                               cout << simMatrix[m][n] << '\t'
+                               m->mothurOut(toString(simMatrix[k][n]));  m->mothurOut("\t")
                        }
-                       cout << endl;
+                       m->mothurOutEndLine(); 
                }
 
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "BootSharedCommand", "printSims");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 /***********************************************************/
-void BootSharedCommand::process(SharedOrderVector* order) {
+int BootSharedCommand::process(SharedOrderVector* order) {
        try{
                                EstOutput data;
                                vector<SharedRAbundVector*> subset;
-                               
+                                                               
                                //open an ostream for each calc to print to
                                for (int z = 0; z < treeCalculators.size(); z++) {
                                        //create a new filename
-                                       outputFile = getRootName(globaldata->inputFileName) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre";
+                                       outputFile = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre";
                                        openOutputFile(outputFile, *(out[z]));
+                                       outputNames.push_back(outputFile);
                                }
                                
+                               m->mothurOut("Generating bootstrap trees..."); cout.flush();
+                               
                                //create a file for each calculator with the 1000 trees in it.
                                for (int p = 0; p < iters; p++) {
                                        
+                                       if (m->control_pressed) {  return 1; }
+                                       
                                        util->getSharedVectorswithReplacement(globaldata->Groups, lookup, order);  //fills group vectors from order vector.
+
                                
                                        //for each calculator                                                                                           
                                        for(int i = 0 ; i < treeCalculators.size(); i++) {
-                                       
+                                               
+                                               if (m->control_pressed) {  return 1; }
+                                               
                                                //initialize simMatrix
                                                simMatrix.clear();
                                                simMatrix.resize(numGroups);
-                                               for (int m = 0; m < simMatrix.size(); m++)      {
+                                               for (int o = 0; o < simMatrix.size(); o++)      {
                                                        for (int j = 0; j < simMatrix.size(); j++)      {
-                                                               simMatrix[m].push_back(0.0);
+                                                               simMatrix[o].push_back(0.0);
                                                        }
                                                }
                                
@@ -332,23 +446,61 @@ void BootSharedCommand::process(SharedOrderVector* order) {
                                                                }
                                                        }
                                                }
-                               
+                                               
+                                               tempTree = new Tree();
+                                               
+                                               if (m->control_pressed) {   delete tempTree; return 1; }
+                                               
                                                //creates tree from similarity matrix and write out file
-                                               createTree(out[i]);
+                                               createTree(out[i], tempTree);
+                                               
+                                               //save trees for consensus command.
+                                               trees[i].push_back(tempTree);
                                        }
                                }
+                               
+                               m->mothurOut("\tDone."); m->mothurOutEndLine();
+                               //delete globaldata's tree
+                               //for (int m = 0; m < globaldata->gTree.size(); m++) {  delete globaldata->gTree[m];  }
+                               //globaldata->gTree.clear();
+                               
+                               
+                               //create consensus trees for each bootstrapped tree set
+                               for (int k = 0; k < trees.size(); k++) {
+                                       
+                                       m->mothurOut("Generating consensus tree for " + treeCalculators[k]->getName()); m->mothurOutEndLine();
+                                       
+                                       if (m->control_pressed) {  return 1; }
+                                       
+                                       //set global data to calc trees
+                                       globaldata->gTree = trees[k];
+                                       
+                                       string filename = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + treeCalculators[k]->getName() + ".boot" + order->getLabel();
+                                       consensus = new ConcensusCommand(filename);
+                                       consensus->execute();
+                                       delete consensus;
+                                       
+                                       outputNames.push_back(filename + ".cons.pairs");
+                                       outputNames.push_back(filename + ".cons.tre");
+                                       
+                                       //delete globaldata's tree
+                                       //for (int m = 0; m < globaldata->gTree.size(); m++) {  delete globaldata->gTree[m];  }
+                                       //globaldata->gTree.clear();
+                                       
+                               }
+                               
+                               
+                                       
                                //close ostream for each calc
                                for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); }
-
+                               
+                               return 0;
+       
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "BootSharedCommand", "process");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the BootSharedCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 /***********************************************************/