From: westcott Date: Wed, 12 May 2010 12:15:09 +0000 (+0000) Subject: modified freq parameter be a percentage of numSeqs, added catchall command - not... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=5a86e9e5a5a9e061e17b3ae64fb8881f14e53b8a modified freq parameter be a percentage of numSeqs, added catchall command - not working right yet. --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 259ef9c..5b5981b 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -32,6 +32,8 @@ A78434891162224F00100BE0 /* chimeraccodecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraccodecommand.cpp; sourceTree = ""; }; A7D215C811996C6E00F13F13 /* clearcutcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clearcutcommand.h; sourceTree = ""; }; A7D215C911996C6E00F13F13 /* clearcutcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clearcutcommand.cpp; sourceTree = ""; }; + A7D216061199C47F00F13F13 /* catchallcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = catchallcommand.h; sourceTree = ""; }; + A7D216071199C47F00F13F13 /* catchallcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = catchallcommand.cpp; sourceTree = ""; }; A7DA1FEC113FECD400BF472F /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = ""; }; A7DA1FED113FECD400BF472F /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = ""; }; A7DA1FEE113FECD400BF472F /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligncommand.cpp; sourceTree = ""; }; @@ -646,6 +648,8 @@ A7DA1FFE113FECD400BF472F /* binsequencecommand.h */, A7DA2007113FECD400BF472F /* bootstrapsharedcommand.cpp */, A7DA2008113FECD400BF472F /* bootstrapsharedcommand.h */, + A7D216061199C47F00F13F13 /* catchallcommand.h */, + A7D216071199C47F00F13F13 /* catchallcommand.cpp */, A7DA2017113FECD400BF472F /* chimeraseqscommand.cpp */, A78434881162224F00100BE0 /* chimeraccodecommand.h */, A7DA2018113FECD400BF472F /* chimeraseqscommand.h */, diff --git a/catchallcommand.cpp b/catchallcommand.cpp new file mode 100644 index 0000000..434c273 --- /dev/null +++ b/catchallcommand.cpp @@ -0,0 +1,293 @@ +/* + * catchallcommand.cpp + * Mothur + * + * Created by westcott on 5/11/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "catchallcommand.h" +#include "globaldata.hpp" + +/**************************************************************************************/ +CatchAllCommand::CatchAllCommand(string option) { + try { + globaldata = GlobalData::getInstance(); + abort = false; + allLines = 0; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"sabund","label","inputdir","outputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("sabund"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["sabund"] = inputDir + it->second; } + } + } + + //check for required parameters + sabundfile = validParameter.validFile(parameters, "sabund", true); + if (sabundfile == "not open") { sabundfile = ""; abort = true; } + else if (sabundfile == "not found") { sabundfile = ""; m->mothurOut("You must provide either a sabund file for the catchall command."); m->mothurOutEndLine(); abort=true; } + else { globaldata->setSabundFile(sabundfile); globaldata->setFormat("sabund"); } + + string label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = "./"; } + } + + } + catch(exception& e) { + m->errorOut(e, "CatchAllCommand", "CatchAllCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +void CatchAllCommand::help(){ + try { + m->mothurOut("The catchall command interfaces mothur with the catchall program written by ...citation goes here...\n"); + m->mothurOut("For more information about clearcut refer to http://catchall.cac.cornell.edu/ \n"); + m->mothurOut("The catchall executable must be in a folder called catchall in the same folder as your mothur executable, similar to mothur's requirements for using blast. \n"); + m->mothurOut("The catchall command parameters are sabund and label, sabund is required. \n"); + m->mothurOut("The label parameter is used to analyze specific labels in your input.\n"); + m->mothurOut("The catchall command should be in the following format: \n"); + m->mothurOut("catchall(sabund=yourSabundFile) \n"); + m->mothurOut("Example: catchall(sabund=abrecovery.fn.sabund) \n"); + } + catch(exception& e) { + m->errorOut(e, "CatchAllCommand", "help"); + exit(1); + } +} + +/**************************************************************************************/ +int CatchAllCommand::execute() { + try { + + if (abort == true) { return 0; } + + vector outputNames; + + //prepare full output directory + outputDir = getFullPathName(outputDir); + + //get location of catchall + GlobalData* globaldata = GlobalData::getInstance(); + string path = globaldata->argv; + path = path.substr(0, (path.find_last_of('m'))); + path = getFullPathName(path); + + string catchAllCommandExe = ""; + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + catchAllCommandExe += "mono " + path + "catchall/CatchAllcmdL.exe "; + #else + catchAllCommandExe += path + "catchall/CatchAllcmdW.exe "; + #endif + + read = new ReadOTUFile(sabundfile); + read->read(&*globaldata); + + SAbundVector* sabund = globaldata->sabund; + string lastLabel = sabund->getLabel(); + input = globaldata->ginput; + + set processedLabels; + set userLabels = labels; + + //for each label the user selected + while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + + if(allLines == 1 || labels.count(sabund->getLabel()) == 1){ + m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); + + //create list of output files catchall will make + //datasetname_Analysis.csv + //datasetname_BestModelsAnalysis.csv + //datasetname_BestModelsFits.csv + //datasetname_BubblePlot.csv + //datasetname_Poisson_fits.csv + //datasetname_SingleExp_fits.csv + //datasetname_ThreeMixedExp_fits.csv + //datasetname_TwoMixedExp_fits.csv + + //create catchall input file from mothur's inputfile + string filename = process(sabund); + sabund->print(cout); + //create system command + string catchAllCommand = catchAllCommandExe + filename + " " + path; +cout << catchAllCommand << endl; + //run catchall + system(catchAllCommand.c_str()); + + if (m->control_pressed) { delete read; delete input; globaldata->ginput = NULL; delete sabund; return 0; } + + processedLabels.insert(sabund->getLabel()); + userLabels.erase(sabund->getLabel()); + } + + if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = sabund->getLabel(); + + delete sabund; + sabund = (input->getSAbundVector(lastLabel)); + + m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); + + //create list of output files catchall will make + //datasetname_Analysis.csv + //datasetname_BestModelsAnalysis.csv + //datasetname_BestModelsFits.csv + //datasetname_BubblePlot.csv + //datasetname_Poisson_fits.csv + //datasetname_SingleExp_fits.csv + //datasetname_ThreeMixedExp_fits.csv + //datasetname_TwoMixedExp_fits.csv + + + //create catchall input file from mothur's inputfile + string filename = process(sabund); + + //create system command + string catchAllCommand = catchAllCommandExe + filename + " " + path; +cout << catchAllCommand << endl; + //run catchall + system(catchAllCommand.c_str()); + + if (m->control_pressed) { delete read; delete input; globaldata->ginput = NULL; delete sabund; return 0; } + + processedLabels.insert(sabund->getLabel()); + userLabels.erase(sabund->getLabel()); + + //restore real lastlabel to save below + sabund->setLabel(saveLabel); + } + + + lastLabel = sabund->getLabel(); + + delete sabund; + sabund = (input->getSAbundVector()); + } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + if (sabund != NULL) { delete sabund; } + sabund = (input->getSAbundVector(lastLabel)); + + m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); + //create list of output files catchall will make + //datasetname_Analysis.csv + //datasetname_BestModelsAnalysis.csv + //datasetname_BestModelsFits.csv + //datasetname_BubblePlot.csv + //datasetname_Poisson_fits.csv + //datasetname_SingleExp_fits.csv + //datasetname_ThreeMixedExp_fits.csv + //datasetname_TwoMixedExp_fits.csv + + //create catchall input file from mothur's inputfile + string filename = process(sabund); + + //create system command + string catchAllCommand = catchAllCommandExe + filename + " " + path; +cout << catchAllCommand << endl; + //run catchall + system(catchAllCommand.c_str()); + + + delete sabund; + } + + delete read; + delete input; globaldata->ginput = NULL; + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CatchAllCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +string CatchAllCommand::process(SAbundVector* sabund) { + try { + string filename = outputDir + getRootName(getSimpleName(sabundfile)) + sabund->getLabel() + ".catchall"; + filename = getFullPathName(filename); + + ofstream out; + openOutputFile(filename, out); + + for (int i = 1; i <= sabund->getMaxRank(); i++) { + int temp = sabund->get(i); + + if (temp != 0) { + out << i << "," << temp << endl; + } + } + out.close(); + + return filename; + + } + catch(exception& e) { + m->errorOut(e, "CatchAllCommand", "process"); + exit(1); + } +} +/**************************************************************************************/ + + + diff --git a/catchallcommand.h b/catchallcommand.h new file mode 100644 index 0000000..ac9f115 --- /dev/null +++ b/catchallcommand.h @@ -0,0 +1,50 @@ +#ifndef CATCHALLCOMMAND_H +#define CATCHALLCOMMAND_H + +/* + * catchallcommand.h + * Mothur + * + * Created by westcott on 5/11/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" +#include "inputdata.h" +#include "readotu.h" +#include "sabundvector.hpp" + +/* + citation goes here + */ + +/****************************************************************************/ + +class CatchAllCommand : public Command { + +public: + + CatchAllCommand(string); + ~CatchAllCommand() {}; + int execute(); + void help(); + +private: + + GlobalData* globaldata; + ReadOTUFile* read; + InputData* input; + + string outputDir, sabundfile, rabundfile, listfile, format; + bool abort, allLines; + set labels; + + string process(SAbundVector*); +}; + +/****************************************************************************/ + +#endif + + diff --git a/clearcutcommand.cpp b/clearcutcommand.cpp index fc028a2..f8d389c 100644 --- a/clearcutcommand.cpp +++ b/clearcutcommand.cpp @@ -139,6 +139,7 @@ void ClearcutCommand::help(){ try { m->mothurOut("The clearcut command interfaces mothur with the clearcut program written by Initiative for Bioinformatics and Evolutionary Studies (IBEST) at the University of Idaho.\n"); m->mothurOut("For more information about clearcut refer to http://bioinformatics.hungry.com/clearcut/ \n"); + m->mothurOut("The clearcut executable must be in a folder called clearcut in the same folder as your mothur executable, similar to mothur's requirements for using blast. \n"); m->mothurOut("The clearcut command parameters are phylip, fasta, version, verbose, quiet, seed, norandom, shuffle, neighbor, expblen, expdist, ntrees, matrixout, stdout, kimura, jukes, protein, DNA, stdin. \n"); m->mothurOut("The phylip parameter allows you to enter your phylip formatted distance matrix. \n"); m->mothurOut("The fasta parameter allows you to enter your aligned fasta file, if you enter a fastafile you specify if the sequences are DNA or protein using the DNA or protein parameters. \n"); @@ -233,3 +234,7 @@ int ClearcutCommand::execute() { } } /**************************************************************************************/ + + + + diff --git a/collect.cpp b/collect.cpp index 62ed9f2..379424c 100644 --- a/collect.cpp +++ b/collect.cpp @@ -11,7 +11,7 @@ /***********************************************************************/ -int Collect::getCurve(int increment = 1){ +int Collect::getCurve(float percentFreq = 0.01){ try { RAbundVector* lookup = new RAbundVector(order->getNumBins()); SAbundVector* rank = new SAbundVector(order->getMaxRank()+1); @@ -21,7 +21,11 @@ int Collect::getCurve(int increment = 1){ for(int i=0;iregisterDisplay(displays[i]); //adds a display[i] to cdd displays[i]->init(label); //sets displays label - } + } + + //convert freq percentage to number + int increment = numSeqs * percentFreq; + for(int i=0;icontrol_pressed) { delete lookup; delete rank; delete ccd; return 1; } @@ -62,9 +66,9 @@ int Collect::getCurve(int increment = 1){ } /***********************************************************************/ -int Collect::getSharedCurve(int increment = 1){ +int Collect::getSharedCurve(float percentFreq = 0.01){ try { - globaldata = GlobalData::getInstance(); + globaldata = GlobalData::getInstance(); vector lookup; vector subset; @@ -96,6 +100,9 @@ try { else { displays[i]->init(groupLabel); } } + //convert freq percentage to number + int increment = numSeqs * percentFreq; + //sample all the members for(int i=0;igetNumSeqs()), sharedorder(sharedorder), displays(disp), label(sharedorder->getLabel()) { m = MothurOut::getInstance(); } ~Collect(){ }; - int getCurve(int); - int getSharedCurve(int); + int getCurve(float); + int getSharedCurve(float); private: SharedOrderVector* sharedorder; diff --git a/collectcommand.cpp b/collectcommand.cpp index cf8ba10..1e2bf2f 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -86,7 +86,7 @@ CollectCommand::CollectCommand(string option) { splitAtDash(calc, Estimators); string temp; - temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } + temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "0.10"; } convert(temp, freq); temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } @@ -110,6 +110,7 @@ void CollectCommand::help(){ m->mothurOut("The collect.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster.\n"); m->mothurOut("The collect.single command parameters are label, freq, calc and abund. No parameters are required. \n"); m->mothurOut("The collect.single command should be in the following format: \n"); + m->mothurOut("The freq parameter is used indicate when to output your data. It is a percentage of the number of sequences. By default it is set to 0.10, meaning 10%. \n"); m->mothurOut("collect.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n"); m->mothurOut("Example collect(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-chao-ace-jack).\n"); m->mothurOut("The default values for freq is 100, and calc are sobs-chao-ace-jack-shannon-npshannon-simpson.\n"); diff --git a/collectcommand.h b/collectcommand.h index 0df1bab..773bdae 100644 --- a/collectcommand.h +++ b/collectcommand.h @@ -50,7 +50,8 @@ private: Collect* cCurve; ValidCalculators* validCalculator; vector cDisplays; - int freq, abund, size; + int abund, size; + float freq; bool abort, allLines; set labels; //holds labels to be used diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index 28e58b1..78ce5c4 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -104,7 +104,7 @@ CollectSharedCommand::CollectSharedCommand(string option) { globaldata->Groups = Groups; string temp; - temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } + temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "0.10"; } convert(temp, freq); temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; } @@ -208,6 +208,7 @@ void CollectSharedCommand::help(){ m->mothurOut("Example collect.shared(label=unique-.01-.03, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n"); m->mothurOut("The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan.\n"); m->mothurOut("The default value for groups is all the groups in your groupfile.\n"); + m->mothurOut("The freq parameter is used indicate when to output your data. It is a percentage of the number of sequences. By default it is set to 0.10, meaning 10%. \n"); validCalculator->printCalc("shared", cout); m->mothurOut("The label parameter is used to analyze specific labels in your input.\n"); m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n"); diff --git a/collectsharedcommand.h b/collectsharedcommand.h index da43adc..b7a4941 100644 --- a/collectsharedcommand.h +++ b/collectsharedcommand.h @@ -42,7 +42,7 @@ private: ValidCalculators* validCalculator; Collect* cCurve; vector cDisplays; - int freq; + float freq; string format; bool abort, allLines, all; diff --git a/commandfactory.cpp b/commandfactory.cpp index 9917b3c..be85727 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -76,6 +76,7 @@ #include "makegroupcommand.h" #include "chopseqscommand.h" #include "clearcutcommand.h" +#include "catchallcommand.h" /*******************************************************/ @@ -159,6 +160,7 @@ CommandFactory::CommandFactory(){ commands["make.group"] = "make.group"; commands["chop.seqs"] = "chop.seqs"; commands["clearcut"] = "clearcut"; + commands["catchall"] = "catchall"; commands["classify.seqs"] = "MPIEnabled"; commands["dist.seqs"] = "MPIEnabled"; commands["filter.seqs"] = "MPIEnabled"; @@ -279,6 +281,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "make.group") { command = new MakeGroupCommand(optionString); } else if(commandName == "chop.seqs") { command = new ChopSeqsCommand(optionString); } else if(commandName == "clearcut") { command = new ClearcutCommand(optionString); } + else if(commandName == "catchall") { command = new CatchAllCommand(optionString); } else { command = new NoCommand(optionString); } return command; diff --git a/makefile b/makefile index 7c21475..de1b77c 100644 --- a/makefile +++ b/makefile @@ -148,7 +148,8 @@ mothur : \ ./helpcommand.o\ ./makegroupcommand.o\ ./chopseqscommand.o\ - ./clearcutcommand.o\ + ./clearcutcommand.o\ + ./catchallcommand.o\ ./inputdata.o\ ./jackknife.o\ ./kmer.o\ @@ -352,7 +353,8 @@ mothur : \ ./helpcommand.o\ ./makegroupcommand.o\ ./chopseqscommand.o\ - ./clearcutcommand.o\ + ./clearcutcommand.o\ + ./catchallcommand.o\ ./inputdata.o\ ./jackknife.o\ ./kmer.o\ @@ -559,7 +561,8 @@ clean : ./helpcommand.o\ ./makegroupcommand.o\ ./chopseqscommand.o\ - ./clearcutcommand.o\ + ./clearcutcommand.o\ + ./catchallcommand.o\ ./inputdata.o\ ./jackknife.o\ ./kmer.o\ @@ -1668,6 +1671,10 @@ install : mothur # Item # 203 -- clearcutcommand -- ./clearcutcommand.o : clearcutcommand.cpp - $(CC) $(CC_OPTIONS) clearcutcommand.cpp -c $(INCLUDE) -o ./clearcutcommand.o + $(CC) $(CC_OPTIONS) clearcutcommand.cpp -c $(INCLUDE) -o ./clearcutcommand.o + +# Item # 204 -- catchallcommand -- +./catchallcommand.o : catchallcommand.cpp + $(CC) $(CC_OPTIONS) catchallcommand.cpp -c $(INCLUDE) -o ./catchallcommand.o ##### END RUN #### diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp index 978b188..7e0574e 100644 --- a/phylodiversitycommand.cpp +++ b/phylodiversitycommand.cpp @@ -41,7 +41,7 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option) { m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true; } string temp; - temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } + temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "0.10"; } convert(temp, freq); temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } @@ -69,7 +69,16 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option) { void PhyloDiversityCommand::help(){ try { - + m->mothurOut("The phylo.diversity command can only be executed after a successful read.tree command.\n"); + m->mothurOut("The phylo.diversity command parameters are groups, iters, freq and rarefy. No parameters are required.\n"); + m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n"); + m->mothurOut("The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n"); + m->mothurOut("The freq parameter is used indicate when to output your data. It is a percentage of the number of sequences. By default it is set to 0.10, meaning 10%. \n"); + m->mothurOut("The rarefy parameter allows you to create a rarefaction curve. The default is false.\n"); + m->mothurOut("The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n"); + m->mothurOut("Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n"); + m->mothurOut("The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n"); } catch(exception& e) { @@ -122,14 +131,17 @@ int PhyloDiversityCommand::execute(){ numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using + //convert freq percentage to number + int increment = numLeafNodes * freq; + //each group, each sampling, if no rarefy iters = 1; vector< vector > diversity; diversity.resize(globaldata->Groups.size()); //initialize sampling spots vector numSampledList; - for(int k = 0; k < numLeafNodes; k++){ if((k == 0) || (k+1) % freq == 0){ numSampledList.push_back(k); } } - if(numLeafNodes % freq != 0){ numSampledList.push_back(numLeafNodes); } + for(int k = 0; k < numLeafNodes; k++){ if((k == 0) || (k+1) % increment == 0){ numSampledList.push_back(k); } } + if(numLeafNodes % increment != 0){ numSampledList.push_back(numLeafNodes); } //initialize diversity for (int j = 0; j < diversity.size(); j++) { diversity[j].resize(numSampledList.size(), 0.0); } // 10sampled 20 sampled ... @@ -147,7 +159,7 @@ int PhyloDiversityCommand::execute(){ leavesSampled.push_back(randomLeaf[k]); - if((k == 0) || (k+1) % freq == 0){ //ready to calc? + if((k == 0) || (k+1) % increment == 0){ //ready to calc? data = phylo.getValues(trees[i], leavesSampled); @@ -158,7 +170,7 @@ int PhyloDiversityCommand::execute(){ } } - if(numLeafNodes % freq != 0){ + if(numLeafNodes % increment != 0){ data = phylo.getValues(trees[i], leavesSampled); diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h index bd26173..8dd569b 100644 --- a/phylodiversitycommand.h +++ b/phylodiversitycommand.h @@ -24,8 +24,9 @@ class PhyloDiversityCommand : public Command { private: GlobalData* globaldata; - - int iters, freq; + + float freq; + int iters; bool abort, rarefy; string groups, outputDir; vector Groups, outputNames; //holds groups to be used, and outputFile names diff --git a/rarefact.cpp b/rarefact.cpp index f5e7952..76f6829 100644 --- a/rarefact.cpp +++ b/rarefact.cpp @@ -12,13 +12,16 @@ /***********************************************************************/ -int Rarefact::getCurve(int increment = 1, int nIters = 1000){ +int Rarefact::getCurve(float percentFreq = 0.01, int nIters = 1000){ try { RarefactionCurveData* rcd = new RarefactionCurveData(); for(int i=0;iregisterDisplay(displays[i]); } - + + //convert freq percentage to number + int increment = numSeqs * percentFreq; + for(int iter=0;iterjumble == false) { nIters = 1; } + //convert freq percentage to number + int increment = numSeqs * percentFreq; + for(int iter=0;itermothurOut("The rarefaction.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION.\n"); m->mothurOut("The rarefaction.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster.\n"); m->mothurOut("The rarefaction.single command parameters are label, iters, freq, calc and abund. No parameters are required. \n"); + m->mothurOut("The freq parameter is used indicate when to output your data. It is a percentage of the number of sequences. By default it is set to 0.10, meaning 10%. \n"); m->mothurOut("The rarefaction.single command should be in the following format: \n"); m->mothurOut("rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n"); m->mothurOut("Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n"); diff --git a/rarefactcommand.h b/rarefactcommand.h index dce1a0d..f4492a2 100644 --- a/rarefactcommand.h +++ b/rarefactcommand.h @@ -36,8 +36,9 @@ private: InputData* input; ValidCalculators* validCalculator; Rarefact* rCurve; - int freq, nIters, abund; - + int nIters, abund; + float freq; + bool abort, allLines; set labels; //holds labels to be used string label, calc; diff --git a/rarefactsharedcommand.cpp b/rarefactsharedcommand.cpp index 37b54d1..357bc87 100644 --- a/rarefactsharedcommand.cpp +++ b/rarefactsharedcommand.cpp @@ -28,7 +28,7 @@ RareFactSharedCommand::RareFactSharedCommand(string option) { else { //valid paramters for this command - string Array[] = {"iters","label","calc","groups", "jumble","outputdir","inputdir"}; + string Array[] = {"iters","freq","label","calc","groups", "jumble","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -84,6 +84,9 @@ RareFactSharedCommand::RareFactSharedCommand(string option) { globaldata->Groups = Groups; string temp; + temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "0.10"; } + convert(temp, freq); + temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } convert(temp, nIters); @@ -130,6 +133,7 @@ void RareFactSharedCommand::help(){ m->mothurOut("The rarefaction.shared command parameters are label, iters, groups, jumble and calc. No parameters are required.\n"); m->mothurOut("The rarefaction command should be in the following format: \n"); m->mothurOut("rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n"); + m->mothurOut("The freq parameter is used indicate when to output your data. It is a percentage of the number of sequences. By default it is set to 0.10, meaning 10%. \n"); m->mothurOut("Example rarefaction.shared(label=unique-0.01-0.03, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n"); m->mothurOut("The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n"); m->mothurOut("The default value for groups is all the groups in your groupfile, and jumble is true.\n"); diff --git a/rarefactsharedcommand.h b/rarefactsharedcommand.h index da26d17..1bcddb5 100644 --- a/rarefactsharedcommand.h +++ b/rarefactsharedcommand.h @@ -34,9 +34,10 @@ private: ValidCalculators* validCalculator; Rarefact* rCurve; vector rDisplays; - int freq, nIters; + int nIters; string format; - + float freq; + bool abort, allLines, jumble; set labels; //holds labels to be used string label, calc, groups, outputDir;