]> git.donarmstrong.com Git - mothur.git/blobdiff - collectsharedcommand.cpp
moved utilities out of mothur.h and into mothurOut class.
[mothur.git] / collectsharedcommand.cpp
index 217045a4a1fc5dc0c83986f3b3a0345b755e1f6c..c0b8248e58981473b1f7bf70709863c36c68992b 100644 (file)
 #include "sharedsorest.h"
 #include "sharedthetayc.h"
 #include "sharedthetan.h"
+#include "sharedkstest.h"
+#include "whittaker.h"
+#include "sharednseqs.h"
+#include "sharedochiai.h"
+#include "sharedanderbergs.h"
+#include "sharedkulczynski.h"
+#include "sharedkulczynskicody.h"
+#include "sharedlennon.h"
+#include "sharedmorisitahorn.h"
+#include "sharedbraycurtis.h"
+#include "sharedjackknife.h"
+#include "whittaker.h"
+
 
 
 //**********************************************************************************************************************
 
-CollectSharedCommand::CollectSharedCommand(){
+CollectSharedCommand::CollectSharedCommand(string option)  {
        try {
                globaldata = GlobalData::getInstance();
-               string fileNameRoot;
-               fileNameRoot = getRootName(globaldata->inputFileName);
-               format = globaldata->getFormat();
-               validCalculator = new ValidCalculators();
+               abort = false;
+               allLines = 1;
+               labels.clear();
+               Estimators.clear();
+               Groups.clear();
+               
+               //allow user to run help
+               if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"freq","label","calc","groups","all","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters=parser.getParameters();
+                       
+                       ValidParameters validParameter;
                
-               int i;
-               for (i=0; i<globaldata->Estimators.size(); i++) {
-                       if (validCalculator->isValidCalculator("shared", globaldata->Estimators[i]) == true) { 
-                               if (globaldata->Estimators[i] == "sharedchao") { 
-                                       cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao")));
-                               }else if (globaldata->Estimators[i] == "sharedsobs") { 
-                                       cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs")));
-                               }else if (globaldata->Estimators[i] == "sharedace") { 
-                                       cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace")));
-                               }else if (globaldata->Estimators[i] == "sharedjabund") {        
-                                       cDisplays.push_back(new CollectDisplay(new SharedJAbund(), new SharedOneColumnFile(fileNameRoot+"shared.jabund")));
-                               }else if (globaldata->Estimators[i] == "sharedsorensonabund") { 
-                                       cDisplays.push_back(new CollectDisplay(new SharedSorAbund(), new SharedOneColumnFile(fileNameRoot+"shared.sorabund")));
-                               }else if (globaldata->Estimators[i] == "sharedjclass") { 
-                                       cDisplays.push_back(new CollectDisplay(new SharedJclass(), new SharedOneColumnFile(fileNameRoot+"shared.jclass")));
-                               }else if (globaldata->Estimators[i] == "sharedsorclass") { 
-                                       cDisplays.push_back(new CollectDisplay(new SharedSorClass(), new SharedOneColumnFile(fileNameRoot+"shared.sorclass")));
-                               }else if (globaldata->Estimators[i] == "sharedjest") { 
-                                       cDisplays.push_back(new CollectDisplay(new SharedJest(), new SharedOneColumnFile(fileNameRoot+"shared.jest")));
-                               }else if (globaldata->Estimators[i] == "sharedsorest") { 
-                                       cDisplays.push_back(new CollectDisplay(new SharedSorEst(), new SharedOneColumnFile(fileNameRoot+"shared.sorest")));
-                               }else if (globaldata->Estimators[i] == "sharedthetayc") { 
-                                       cDisplays.push_back(new CollectDisplay(new SharedThetaYC(), new SharedOneColumnFile(fileNameRoot+"shared.thetayc")));
-                               }else if (globaldata->Estimators[i] == "sharedthetan") { 
-                                       cDisplays.push_back(new CollectDisplay(new SharedThetaN(), new SharedOneColumnFile(fileNameRoot+"shared.thetan")));
-                               }
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                                               
+                       //make sure the user has already run the read.otu command
+                       if (globaldata->getSharedFile() == "") {
+                               if (globaldata->getListFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
+                               else if (globaldata->getGroupFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
+                       }
+
+                       
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking..
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; }
+                       else { 
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+                       
+                       //if the user has not specified any labels use the ones from read.otu
+                       if(label == "") {  
+                               allLines = globaldata->allLines; 
+                               labels = globaldata->labels; 
+                       }
+                               
+                       calc = validParameter.validFile(parameters, "calc", false);                     
+                       if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
+                       else { 
+                                if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
+                       }
+                       m->splitAtDash(calc, Estimators);
+                       
+                       groups = validParameter.validFile(parameters, "groups", false);                 
+                       if (groups == "not found") { groups = ""; }
+                       else { 
+                               m->splitAtDash(groups, Groups);
+                       }
+                       globaldata->Groups = Groups;
+                       
+                       string temp;
+                       temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
+                       convert(temp, freq); 
+                       
+                       temp = validParameter.validFile(parameters, "all", false);                              if (temp == "not found") { temp = "false"; }
+                       all = m->isTrue(temp);
+                                               
+                       if (abort == false) {
+                               
+                               if (outputDir == "") { outputDir += m->hasPath(globaldata->inputFileName); }
+                               string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName));
+                               format = globaldata->getFormat();
+                               int i;
+                               
+                               validCalculator = new ValidCalculators();
+                               util = new SharedUtil();
+                               
+                               for (i=0; i<Estimators.size(); i++) {
+                                       if (validCalculator->isValidCalculator("shared", Estimators[i]) == true) { 
+                                               if (Estimators[i] == "sharedchao") { 
+                                                       cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao")));
+                                                       outputNames.push_back(fileNameRoot+"shared.chao");
+                                               }else if (Estimators[i] == "sharedsobs") { 
+                                                       cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs")));
+                                                       outputNames.push_back(fileNameRoot+"shared.sobs");
+                                               }else if (Estimators[i] == "sharedace") { 
+                                                       cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace")));
+                                                       outputNames.push_back(fileNameRoot+"shared.ace");
+                                               }else if (Estimators[i] == "jabund") {  
+                                                       cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+"jabund")));
+                                                       outputNames.push_back(fileNameRoot+"jabund");
+                                               }else if (Estimators[i] == "sorabund") { 
+                                                       cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+"sorabund")));
+                                                       outputNames.push_back(fileNameRoot+"sorabund");
+                                               }else if (Estimators[i] == "jclass") { 
+                                                       cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+"jclass")));
+                                                       outputNames.push_back(fileNameRoot+"jclass");
+                                               }else if (Estimators[i] == "sorclass") { 
+                                                       cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+"sorclass")));
+                                                       outputNames.push_back(fileNameRoot+"sorclass");
+                                               }else if (Estimators[i] == "jest") { 
+                                                       cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+"jest")));
+                                                       outputNames.push_back(fileNameRoot+"jest");
+                                               }else if (Estimators[i] == "sorest") { 
+                                                       cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+"sorest")));
+                                                       outputNames.push_back(fileNameRoot+"sorest");
+                                               }else if (Estimators[i] == "thetayc") { 
+                                                       cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+"thetayc")));
+                                                       outputNames.push_back(fileNameRoot+"thetayc");
+                                               }else if (Estimators[i] == "thetan") { 
+                                                       cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+"thetan")));
+                                                       outputNames.push_back(fileNameRoot+"thetan");
+                                               }else if (Estimators[i] == "kstest") { 
+                                                       cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+"kstest")));
+                                                       outputNames.push_back(fileNameRoot+"kstest");
+                                               }else if (Estimators[i] == "whittaker") { 
+                                                       cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker")));
+                                                       outputNames.push_back(fileNameRoot+"whittaker");
+                                               }else if (Estimators[i] == "sharednseqs") { 
+                                                       cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs")));
+                                                       outputNames.push_back(fileNameRoot+"shared.nseqs");
+                                               }else if (Estimators[i] == "ochiai") { 
+                                                       cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai")));
+                                                       outputNames.push_back(fileNameRoot+"ochiai");
+                                               }else if (Estimators[i] == "anderberg") { 
+                                                       cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg")));
+                                                       outputNames.push_back(fileNameRoot+"anderberg");
+                                               }else if (Estimators[i] == "skulczynski") { 
+                                                       cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski")));
+                                                       outputNames.push_back(fileNameRoot+"kulczynski");
+                                               }else if (Estimators[i] == "kulczynskicody") { 
+                                                       cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+"kulczynskicody")));
+                                                       outputNames.push_back(fileNameRoot+"kulczynskicody");
+                                               }else if (Estimators[i] == "lennon") { 
+                                                       cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+"lennon")));
+                                                       outputNames.push_back(fileNameRoot+"lennon");
+                                               }else if (Estimators[i] == "morisitahorn") { 
+                                                       cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+"morisitahorn")));
+                                                       outputNames.push_back(fileNameRoot+"morisitahorn");
+                                               }else if (Estimators[i] == "braycurtis") { 
+                                                       cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+"braycurtis")));
+                                                       outputNames.push_back(fileNameRoot+"braycurtis");
+                                               }
+                                       }
+                               }       
                        }
                }
-               
-               //reset calc for next command
-               globaldata->setCalc("");
 
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the CollectSharedCommand class Function CollectSharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "CollectSharedCommand", "CollectSharedCommand");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the CollectSharedCommand class function CollectSharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+}
+//**********************************************************************************************************************
+
+void CollectSharedCommand::help(){
+       try {
+               m->mothurOut("The collect.shared command can only be executed after a successful read.otu command.\n");
+               m->mothurOut("The collect.shared command parameters are label, freq, calc and groups.  No parameters are required \n");
+               m->mothurOut("The collect.shared command should be in the following format: \n");
+               m->mothurOut("collect.shared(label=yourLabel, freq=yourFreq, calc=yourEstimators, groups=yourGroups).\n");
+               m->mothurOut("Example collect.shared(label=unique-.01-.03, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
+               m->mothurOut("The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan.\n");
+               m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
+               m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n");
+               validCalculator->printCalc("shared", cout);
+               m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
+               m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
+               m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
+               m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CollectSharedCommand", "help");
                exit(1);
-       }       
-                       
+       }
 }
 
 //**********************************************************************************************************************
 
 CollectSharedCommand::~CollectSharedCommand(){
-       delete order;
-       delete input;
-       delete cCurve;
-       delete read;
+       if (abort == false) {
+               delete input; globaldata->ginput = NULL;
+               delete read;
+               delete util;
+               delete validCalculator;
+               globaldata->gorder = NULL;
+       }
 }
 
 //**********************************************************************************************************************
 
 int CollectSharedCommand::execute(){
        try {
-               int count = 1;
+               
+               if (abort == true) {    return 0;       }
                
                //if the users entered no valid calculators don't execute command
                if (cDisplays.size() == 0) { return 0; }
+               for(int i=0;i<cDisplays.size();i++){    cDisplays[i]->setAll(all);      }       
+       
+               read = new ReadOTUFile(globaldata->inputFileName);      
+               read->read(&*globaldata); 
                
-               if (format == "sharedfile") {
-                       read = new ReadPhilFile(globaldata->inputFileName);     
-                       read->read(&*globaldata); 
-                       
-                       input = globaldata->ginput;
-                       order = input->getSharedOrderVector();
-               }else {
-                       //you are using a list and a groupfile
-                       read = new ReadPhilFile(globaldata->inputFileName);     
-                       read->read(&*globaldata); 
-               
-                       input = globaldata->ginput;
-                       SharedList = globaldata->gSharedList;
-                       order = SharedList->getSharedOrderVector();
-               }
-               
-               while(order != NULL){
+               input = globaldata->ginput;
+               order = input->getSharedOrderVector();
+               string lastLabel = order->getLabel();
                
-                       if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+                       
+               //set users groups
+               util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "collect");
+               util->updateGroupIndex(globaldata->Groups, globaldata->gGroupmap->groupIndex);
+
+               while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                       if (m->control_pressed) { 
+                                       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());         }  
+                                       for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }
+                                       delete order; 
+                                       globaldata->Groups.clear();
+                                       return 0;
+                       }
+
+                       if(allLines == 1 || labels.count(order->getLabel()) == 1){
+                       
+                               m->mothurOut(order->getLabel()); m->mothurOutEndLine();
                                //create collectors curve
                                cCurve = new Collect(order, cDisplays);
-                               convert(globaldata->getFreq(), freq);
                                cCurve->getSharedCurve(freq);
-                       
                                delete cCurve;
                        
-                               cout << order->getLabel() << '\t' << count << endl;
+                               processedLabels.insert(order->getLabel());
+                               userLabels.erase(order->getLabel());
+                       }
+                       
+                       //you have a label the user want that is smaller than this label and the last label has not already been processed
+                       if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = order->getLabel();
+                               
+                               delete order;
+                               order = input->getSharedOrderVector(lastLabel);
+                               
+                               m->mothurOut(order->getLabel()); m->mothurOutEndLine();
+                               //create collectors curve
+                               cCurve = new Collect(order, cDisplays);
+                               cCurve->getSharedCurve(freq);
+                               delete cCurve;
+                               
+                               processedLabels.insert(order->getLabel());
+                               userLabels.erase(order->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               order->setLabel(saveLabel);
                        }
                        
+                       
+                       lastLabel = order->getLabel();                  
+                       
                        //get next line to process
-                       if (format == "sharedfile") {
-                               order = input->getSharedOrderVector();
+                       delete order;
+                       order = input->getSharedOrderVector();
+               }
+               
+               if (m->control_pressed) { 
+                                       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());         }  
+                                       for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }
+                                       globaldata->Groups.clear();
+                                       return 0;
+               }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
                        }else {
-                               //you are using a list and a groupfile
-                               SharedList = input->getSharedListVector(); //get new list vector to process
-                               if (SharedList != NULL) {
-                                       order = SharedList->getSharedOrderVector(); //gets new order vector with group info.
-                               }else {
-                                       break;
-                               }
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
                        }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       if (order != NULL) {  delete order;  }
+                       order = input->getSharedOrderVector(lastLabel);
                        
-                       count++;
+                       m->mothurOut(order->getLabel()); m->mothurOutEndLine();
+                       cCurve = new Collect(order, cDisplays);
+                       cCurve->getSharedCurve(freq);
+                       delete cCurve;
+                       
+                       if (m->control_pressed) { 
+                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());         }  
+                               for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }
+                               delete order; 
+                               globaldata->Groups.clear();
+                               return 0;
+                       }
+
+                       delete order;
                }
-       
+               
                for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }       
+               
+               //reset groups parameter
+               globaldata->Groups.clear(); 
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
+               
                return 0;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the CollectSharedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "CollectSharedCommand", "execute");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the CollectSharedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
 }
 
-
-//**********************************************************************************************************************
+/***********************************************************/