]> git.donarmstrong.com Git - mothur.git/blobdiff - summarysharedcommand.cpp
added out.hierarchy command
[mothur.git] / summarysharedcommand.cpp
index 139b74e762734957c95ccb659434e32068289162..e39d70bee58e1374774453d81d70dcf843c53be2 100644 (file)
@@ -8,9 +8,10 @@
  */
 
 #include "summarysharedcommand.h"
-#include "sharedsobs.h"
+#include "sharedsobscollectsummary.h"
 #include "sharedchao1.h"
 #include "sharedace.h"
+#include "sharednseqs.h"
 #include "sharedjabund.h"
 #include "sharedsorabund.h"
 #include "sharedjclass.h"
 #include "sharedsorest.h"
 #include "sharedthetayc.h"
 #include "sharedthetan.h"
+#include "sharedkstest.h"
+#include "whittaker.h"
+#include "sharedochiai.h"
+#include "sharedanderbergs.h"
+#include "sharedkulczynski.h"
+#include "sharedkulczynskicody.h"
+#include "sharedlennon.h"
+#include "sharedmorisitahorn.h"
+#include "sharedbraycurtis.h"
+#include "sharedjackknife.h"
+#include "whittaker.h"
+
 
 //**********************************************************************************************************************
 
-SummarySharedCommand::SummarySharedCommand(){
+SummarySharedCommand::SummarySharedCommand(string option){
        try {
                globaldata = GlobalData::getInstance();
+               abort = false;
+               allLines = 1;
+               labels.clear();
+               Estimators.clear();
+               
+               //allow user to run help
+               if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"label","calc","groups","all"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
                
-               int i;
-               for (i=0; i<globaldata->sharedSummaryEstimators.size(); i++) {
-                       if (globaldata->sharedSummaryEstimators[i] == "sharedSobs") { 
-                               sumCalculators.push_back(new SharedSobs());
-                       }else if (globaldata->sharedSummaryEstimators[i] == "sharedChao") { 
-                               sumCalculators.push_back(new SharedChao1());
-                       }else if (globaldata->sharedSummaryEstimators[i] == "sharedAce") { 
-                               sumCalculators.push_back(new SharedAce());
-                       }else if (globaldata->sharedSummaryEstimators[i] == "sharedJabund") {   
-                               sumCalculators.push_back(new SharedJAbund());
-                       }else if (globaldata->sharedSummaryEstimators[i] == "sharedSorensonAbund") { 
-                               sumCalculators.push_back(new SharedSorAbund());
-                       }else if (globaldata->sharedSummaryEstimators[i] == "sharedJclass") { 
-                               sumCalculators.push_back(new SharedJclass());
-                       }else if (globaldata->sharedSummaryEstimators[i] == "sharedSorClass") { 
-                               sumCalculators.push_back(new SharedSorClass());
-                       }else if (globaldata->sharedSummaryEstimators[i] == "sharedJest") { 
-                               sumCalculators.push_back(new SharedJest());
-                       }else if (globaldata->sharedSummaryEstimators[i] == "sharedSorEst") { 
-                               sumCalculators.push_back(new SharedSorEst());
-                       }else if (globaldata->sharedSummaryEstimators[i] == "SharedThetaYC") { 
-                               sumCalculators.push_back(new SharedThetaYC());
-                       }else if (globaldata->sharedSummaryEstimators[i] == "SharedThetaN") { 
-                               sumCalculators.push_back(new SharedThetaN());
+                       //check to make sure all parameters are valid for command
+                       for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //make sure the user has already run the read.otu command
+                       if (globaldata->getSharedFile() == "") {
+                                mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); mothurOutEndLine(); abort = true; 
+                       }
+                       
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; }
+                       else { 
+                               if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+                       
+                       //if the user has not specified any labels use the ones from read.otu
+                       if(label == "") {  
+                               allLines = globaldata->allLines; 
+                               labels = globaldata->labels; 
+                       }
+                               
+                       calc = validParameter.validFile(parameters, "calc", false);                     
+                       if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
+                       else { 
+                                if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
+                       }
+                       splitAtDash(calc, Estimators);
+                       
+                       groups = validParameter.validFile(parameters, "groups", false);                 
+                       if (groups == "not found") { groups = ""; }
+                       else { 
+                               splitAtDash(groups, Groups);
+                               globaldata->Groups = Groups;
+                       }
+                       
+                       string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
+                       all = isTrue(temp);
+                       
+                       if (abort == false) {
+                       
+                               validCalculator = new ValidCalculators();
+                               int i;
+                               
+                               for (i=0; i<Estimators.size(); i++) {
+                                       if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) { 
+                                               if (Estimators[i] == "sharedsobs") { 
+                                                       sumCalculators.push_back(new SharedSobsCS());
+                                               }else if (Estimators[i] == "sharedchao") { 
+                                                       sumCalculators.push_back(new SharedChao1());
+                                               }else if (Estimators[i] == "sharedace") { 
+                                                       sumCalculators.push_back(new SharedAce());
+                                               }else if (Estimators[i] == "jabund") {  
+                                                       sumCalculators.push_back(new JAbund());
+                                               }else if (Estimators[i] == "sorabund") { 
+                                                       sumCalculators.push_back(new SorAbund());
+                                               }else if (Estimators[i] == "jclass") { 
+                                                       sumCalculators.push_back(new Jclass());
+                                               }else if (Estimators[i] == "sorclass") { 
+                                                       sumCalculators.push_back(new SorClass());
+                                               }else if (Estimators[i] == "jest") { 
+                                                       sumCalculators.push_back(new Jest());
+                                               }else if (Estimators[i] == "sorest") { 
+                                                       sumCalculators.push_back(new SorEst());
+                                               }else if (Estimators[i] == "thetayc") { 
+                                                       sumCalculators.push_back(new ThetaYC());
+                                               }else if (Estimators[i] == "thetan") { 
+                                                       sumCalculators.push_back(new ThetaN());
+                                               }else if (Estimators[i] == "kstest") { 
+                                                       sumCalculators.push_back(new KSTest());
+                                               }else if (Estimators[i] == "sharednseqs") { 
+                                                       sumCalculators.push_back(new SharedNSeqs());
+                                               }else if (Estimators[i] == "ochiai") { 
+                                                       sumCalculators.push_back(new Ochiai());
+                                               }else if (Estimators[i] == "anderberg") { 
+                                                       sumCalculators.push_back(new Anderberg());
+                                               }else if (Estimators[i] == "kulczynski") { 
+                                                       sumCalculators.push_back(new Kulczynski());
+                                               }else if (Estimators[i] == "kulczynskicody") { 
+                                                       sumCalculators.push_back(new KulczynskiCody());
+                                               }else if (Estimators[i] == "lennon") { 
+                                                       sumCalculators.push_back(new Lennon());
+                                               }else if (Estimators[i] == "morisitahorn") { 
+                                                       sumCalculators.push_back(new MorHorn());
+                                               }else if (Estimators[i] == "braycurtis") { 
+                                                       sumCalculators.push_back(new BrayCurtis());
+                                               }else if (Estimators[i] == "whittaker") { 
+                                                       sumCalculators.push_back(new Whittaker());
+                                               }
+                                       }
+                               }
+                               
+                               outputFileName = ((getRootName(globaldata->inputFileName)) + "shared.summary");
+                               openOutputFile(outputFileName, outputFileHandle);
+                               mult = false;
                        }
                }
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function SummarySharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the SummarySharedCommand class function SummarySharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+}
+
+//**********************************************************************************************************************
+
+void SummarySharedCommand::help(){
+       try {
+               mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
+               mothurOut("The summary.shared command parameters are label, calc and all.  No parameters are required.\n");
+               mothurOut("The summary.shared command should be in the following format: \n");
+               mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
+               mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
+               validCalculator->printCalc("sharedsummary", cout);
+               mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
+               mothurOut("The default value for groups is all the groups in your groupfile.\n");
+               mothurOut("The label parameter is used to analyze specific labels in your input.\n");
+               mothurOut("The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
+               mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
+               mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
+               mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
+       }
+       catch(exception& e) {
+               errorOut(e, "SummarySharedCommand", "help");
                exit(1);
-       }       
+       }
 }
+
 //**********************************************************************************************************************
 
 SummarySharedCommand::~SummarySharedCommand(){
-       delete input;
-       delete read;
+       if (abort == false) {
+               delete read;
+               delete validCalculator;
+       }
 }
 
 //**********************************************************************************************************************
 
 int SummarySharedCommand::execute(){
        try {
-               outputFileName = ((getRootName(globaldata->inputFileName)) + "shared.summary");
-               openOutputFile(outputFileName, outputFileHandle);
        
-               read = new ReadPhilFile(globaldata->inputFileName);     
+               if (abort == true) { return 0; }
+       
+               //if the users entered no valid calculators don't execute command
+               if (sumCalculators.size() == 0) { return 0; }
+               //check if any calcs can do multiples
+               else{
+                       if (all){ 
+                               for (int i = 0; i < sumCalculators.size(); i++) {
+                                       if (sumCalculators[i]->getMultiple() == true) { mult = true; }
+                               }
+                       }
+               }
+               
+               //read first line
+               read = new ReadOTUFile(globaldata->inputFileName);      
                read->read(&*globaldata); 
+                       
+               input = globaldata->ginput;
+               lookup = input->getSharedRAbundVectors();
+               string lastLabel = lookup[0]->getLabel();
                
+               //output estimator names as column headers
                outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
                for(int i=0;i<sumCalculators.size();i++){
                        outputFileHandle << '\t' << sumCalculators[i]->getName();
                }
                outputFileHandle << endl;
                
-               SharedList = globaldata->gSharedList;
-               input = globaldata->ginput;
-               order = SharedList->getSharedOrderVector();
-               getGroupComb();
+               //create file and put column headers for multiple groups file
+               if (mult == true) {
+                       outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
+                       openOutputFile(outAllFileName, outAll);
+                       
+                       outAll << "label" <<'\t' << "comparison" << '\t'; 
+                       for(int i=0;i<sumCalculators.size();i++){
+                               if (sumCalculators[i]->getMultiple() == true) { 
+                                       outAll << '\t' << sumCalculators[i]->getName();
+                               }
+                       }
+                       outAll << endl;
+               }
                
-               int count = 1;
-               while(order != NULL){
+               if (lookup.size() < 2) { 
+                       mothurOut("I cannot run the command without at least 2 valid groups."); 
+                       for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+                       
+                       //close files and clean up
+                       outputFileHandle.close();  remove(outputFileName.c_str());
+                       if (mult == true) {  outAll.close();  remove(outAllFileName.c_str());  }
+                       return 0;
+               //if you only have 2 groups you don't need a .sharedmultiple file
+               }else if ((lookup.size() == 2) && (mult == true)) { 
+                       mult = false;
+                       outAll.close();  
+                       remove(outAllFileName.c_str());
+               }
+                                       
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+                       
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                
-                       if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){                       
-       
-                               cout << order->getLabel() << '\t' << count << endl;
-                               getSharedVectors();  //fills group vectors from order vector.
+                       if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
+                               mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
+                               process(lookup);
                                
-                               //randomize group order
-                               if (globaldata->getJumble() == "1") { random_shuffle(lookup.begin(), lookup.end()); }
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                       }
+                       
+                       if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = lookup[0]->getLabel();
+                                       
+                                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                                       lookup = input->getSharedRAbundVectors(lastLabel);
 
-                               int n = 1; 
-                               for (int k = 0; k < (lookup.size() - 1); k++) { // pass cdd each set of groups to commpare
-                                       for (int l = n; l < lookup.size(); l++) {
-                                               outputFileHandle << order->getLabel() << '\t' << groupComb[n-1] << '\t' << '\t'; //print out label and group
-                                               for(int i=0;i<sumCalculators.size();i++){
-                                                       sumCalculators[i]->getValues(lookup[k], lookup[l]); //saves the calculator outputs
-                                                       outputFileHandle << '\t';
-                                                       sumCalculators[i]->print(outputFileHandle);
-                                               }
-                                               outputFileHandle << endl;
-                                       }
-                                       n++;
-                               }
+                                       mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
+                                       process(lookup);
+                                       
+                                       processedLabels.insert(lookup[0]->getLabel());
+                                       userLabels.erase(lookup[0]->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       lookup[0]->setLabel(saveLabel);
                        }
+                       
+                       lastLabel = lookup[0]->getLabel();                      
+                               
+                       //get next line to process
+                       //prevent memory leak
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                       lookup = input->getSharedRAbundVectors();
+               }
                
-                       SharedList = input->getSharedListVector(); //get new list vector to process
-                       if (SharedList != NULL) {
-                               order = SharedList->getSharedOrderVector(); //gets new order vector with group info.
-                               count++;
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+                               needToRun = true;
                        }else {
-                               break;
+                               mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
                        }
                }
-       
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                               for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
+                               lookup = input->getSharedRAbundVectors(lastLabel);
+
+                               mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
+                               process(lookup);
+                               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+               }
+               
+
+               //reset groups parameter
+               globaldata->Groups.clear();  
+               
+               //close files
+               outputFileHandle.close();
+               if (mult == true) {  outAll.close();  }
+               
+               for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
+               
+               delete input;  globaldata->ginput = NULL;
+
                return 0;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "SummarySharedCommand", "execute");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the SummarySharedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 
-//**********************************************************************************************************************
-
-void SummarySharedCommand::getSharedVectors(){
-try {
-               lookup.clear();
-               //create and initialize vector of sharedvectors, one for each group
-               for (int i = 0; i < globaldata->gGroupmap->getNumGroups(); i++) { 
-                       SharedRAbundVector* temp = new SharedRAbundVector(order->getNumBins());
-                       temp->setLabel(order->getLabel());
-                       temp->setGroup(globaldata->gGroupmap->namesOfGroups[i]);
-                       lookup.push_back(temp);
-               }
-               
-               int numSeqs = order->size();
-               //sample all the members
-               for(int i=0;i<numSeqs;i++){
-                       //get first sample
-                       individual chosen = order->get(i);
-                       int abundance; 
+/***********************************************************/
+void SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
+       try {
+                               //loop through calculators and add to file all for all calcs that can do mutiple groups
+                               if (mult == true) {
+                                       //output label
+                                       outAll << thisLookup[0]->getLabel() << '\t';
                                        
-                       //set info for sharedvector in chosens group
-                       for (int j = 0; j < lookup.size(); j++) { 
-                               if (chosen.group == lookup[j]->getGroup()) {
-                                        abundance = lookup[j]->getAbundance(chosen.bin);
-                                        lookup[j]->set(chosen.bin, (abundance + 1), chosen.group);
-                                        break;
+                                       //output groups names
+                                       string outNames = "";
+                                       for (int j = 0; j < thisLookup.size(); j++) {
+                                               outNames += thisLookup[j]->getGroup() +  "-";
+                                       }
+                                       outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
+                                       outAll << outNames << '\t';
+                                       
+                                       for(int i=0;i<sumCalculators.size();i++){
+                                               if (sumCalculators[i]->getMultiple() == true) { 
+                                                       sumCalculators[i]->getValues(thisLookup);
+                                                       outAll << '\t';
+                                                       sumCalculators[i]->print(outAll);
+                                               }
+                                       }
+                                       outAll << endl;
                                }
-                       }
-                       
-               }
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function getSharedVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the SummarySharedCommand class function getSharedVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
+       
+                               int n = 1; 
+                               vector<SharedRAbundVector*> subset;
+                               for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
+                                       for (int l = n; l < thisLookup.size(); l++) {
+                                               
+                                               outputFileHandle << thisLookup[0]->getLabel() << '\t';
+                                               
+                                               subset.clear(); //clear out old pair of sharedrabunds
+                                               //add new pair of sharedrabunds
+                                               subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
+                                               
+                                               //sort groups to be alphanumeric
+                                               if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
+                                                       outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
+                                               }else{
+                                                       outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
+                                               }
+                                               
+                                               for(int i=0;i<sumCalculators.size();i++) {
 
-}
+                                                       sumCalculators[i]->getValues(subset); //saves the calculator outputs
+                                                       outputFileHandle << '\t';
+                                                       sumCalculators[i]->print(outputFileHandle);
+                                               }
+                                               outputFileHandle << endl;
+                                       }
+                                       n++;
+                               }
 
-/**************************************************************************************/
-void SummarySharedCommand::getGroupComb() {
-       try {
-               string group;
-               
-               int n = 1;
-               for (int i = 0; i < (globaldata->gGroupmap->getNumGroups() - 1); i++) {
-                       for (int l = n; l < globaldata->gGroupmap->getNumGroups(); l++) {
-                               group = globaldata->gGroupmap->namesOfGroups[i] + globaldata->gGroupmap->namesOfGroups[l];
-                               groupComb.push_back(group);     
-                       }
-                       n++;
-               }
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function getGroupComb. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "SummarySharedCommand", "process");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the SummarySharedCommand class function getGroupComb. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-
 }
 
+/***********************************************************/