From d1faab5efe1c28700890bdec5b4d8e817fa1dab2 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Wed, 5 Mar 2014 14:32:31 -0500 Subject: [PATCH] added shannonrange calc. --- Mothur.xcodeproj/project.pbxproj | 2 +- collectcommand.cpp | 22 ++++++++-- collectcommand.h | 2 +- filterseqscommand.cpp | 3 +- mothur.h | 19 +++++++- rabundvector.cpp | 1 + rarefactcommand.cpp | 18 +++++++- rarefactcommand.h | 2 +- sabundvector.cpp | 3 +- shannonrange.cpp | 75 ++++++++++++++++++++++++-------- shannonrange.h | 13 +++++- summarycommand.cpp | 14 +++++- summarycommand.h | 2 +- validcalculator.cpp | 5 +++ 14 files changed, 144 insertions(+), 37 deletions(-) diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 418596f..80b05db 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -1723,6 +1723,7 @@ A7A09B0E18773BF700FAA081 /* shannonrange.h */, A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */, A7E9B7E912D37EC400DA6239 /* sharedace.cpp */, + A7E9B7F412D37EC400DA6239 /* sharedjabund.cpp */, A7E9B7EA12D37EC400DA6239 /* sharedace.h */, A7E9B7EC12D37EC400DA6239 /* sharedanderbergs.cpp */, A7E9B7ED12D37EC400DA6239 /* sharedanderbergs.h */, @@ -1730,7 +1731,6 @@ A7E9B7EF12D37EC400DA6239 /* sharedbraycurtis.h */, A7E9B7F012D37EC400DA6239 /* sharedchao1.cpp */, A7E9B7F112D37EC400DA6239 /* sharedchao1.h */, - A7E9B7F412D37EC400DA6239 /* sharedjabund.cpp */, A7E9B7F512D37EC400DA6239 /* sharedjabund.h */, A7E9B7F612D37EC400DA6239 /* sharedjackknife.cpp */, A7E9B7F712D37EC400DA6239 /* sharedjackknife.h */, diff --git a/collectcommand.cpp b/collectcommand.cpp index b892a91..4b34447 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -33,6 +33,7 @@ #include "solow.h" #include "shen.h" #include "coverage.h" +#include "shannonrange.h" //********************************************************************************************************************** @@ -44,9 +45,10 @@ vector CollectCommand::setParameters(){ CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); - CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson", "", "", "","",true,false,true); parameters.push_back(pcalc); + CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson-shannonrange", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund); - CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize); + CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha); + CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -64,12 +66,13 @@ string CollectCommand::getHelpString(){ try { string helpString = ""; ValidCalculators validCalculator; - helpString += "The collect.single command parameters are list, sabund, rabund, shared, label, freq, calc and abund. list, sabund, rabund or shared is required unless you have a valid current file. \n"; + helpString += "The collect.single command parameters are list, sabund, rabund, shared, label, freq, calc, alpha and abund. list, sabund, rabund or shared is required unless you have a valid current file. \n"; helpString += "The collect.single command should be in the following format: \n"; helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n"; helpString += "collect.single(label=yourLabel, freq=yourFreq, calc=yourEstimators).\n"; helpString += "Example collect(label=unique-.01-.03, freq=10, calc=sobs-chao-ace-jack).\n"; helpString += "The default values for freq is 100, and calc are sobs-chao-ace-jack-shannon-npshannon-simpson.\n"; + helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n"; helpString += validCalculator.printCalc("single"); helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n"; @@ -93,6 +96,7 @@ string CollectCommand::getOutputPattern(string type) { else if (type == "jack") { pattern = "[filename],jack"; } else if (type == "shannon") { pattern = "[filename],shannon"; } else if (type == "shannoneven") { pattern = "[filename],shannoneven"; } + else if (type == "shannonrange"){ pattern = "[filename],shannonrange"; } else if (type == "npshannon") { pattern = "[filename],npshannon"; } else if (type == "heip") { pattern = "[filename],heip"; } else if (type == "smithwilson") { pattern = "[filename],smithwilson"; } @@ -133,6 +137,7 @@ CollectCommand::CollectCommand(){ outputTypes["jack"] = tempOutNames; outputTypes["shannon"] = tempOutNames; outputTypes["shannoneven"] = tempOutNames; + outputTypes["shannonrange"] = tempOutNames; outputTypes["npshannon"] = tempOutNames; outputTypes["heip"] = tempOutNames; outputTypes["smithwilson"] = tempOutNames; @@ -195,6 +200,7 @@ CollectCommand::CollectCommand(string option) { outputTypes["smithwilson"] = tempOutNames; outputTypes["simpson"] = tempOutNames; outputTypes["simpsoneven"] = tempOutNames; + outputTypes["shannonrange"] = tempOutNames; outputTypes["invsimpson"] = tempOutNames; outputTypes["bootstrap"] = tempOutNames; outputTypes["geometric"] = tempOutNames; @@ -319,7 +325,12 @@ CollectCommand::CollectCommand(string option) { string temp; temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } - m->mothurConvert(temp, freq); + m->mothurConvert(temp, freq); + + temp = validParameter.validFile(parameters, "alpha", false); if (temp == "not found") { temp = "1"; } + m->mothurConvert(temp, alpha); + + if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2."); m->mothurOutEndLine(); abort=true; } temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } m->mothurConvert(temp, abund); @@ -386,6 +397,9 @@ int CollectCommand::execute(){ }else if (Estimators[i] == "shannoneven") { cDisplays.push_back(new CollectDisplay(new ShannonEven(), new OneColumnFile(getOutputFileName("shannoneven", variables)))); outputNames.push_back(getOutputFileName("shannoneven", variables)); outputTypes["shannoneven"].push_back(getOutputFileName("shannoneven", variables)); + }else if (Estimators[i] == "shannonrange") { + cDisplays.push_back(new CollectDisplay(new RangeShannon(alpha), new ThreeColumnFile(getOutputFileName("shannonrange", variables)))); + outputNames.push_back(getOutputFileName("shannonrange", variables)); outputTypes["shannoneven"].push_back(getOutputFileName("shannonrange", variables)); }else if (Estimators[i] == "npshannon") { cDisplays.push_back(new CollectDisplay(new NPShannon(), new OneColumnFile(getOutputFileName("npshannon", variables)))); outputNames.push_back(getOutputFileName("npshannon", variables)); outputTypes["npshannon"].push_back(getOutputFileName("npshannon", variables)); diff --git a/collectcommand.h b/collectcommand.h index ee1b914..8423b7b 100644 --- a/collectcommand.h +++ b/collectcommand.h @@ -52,7 +52,7 @@ private: InputData* input; Collect* cCurve; vector cDisplays; - int abund, size; + int abund, size, alpha; float freq; vector outputNames; diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index 8b68988..4ac3381 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -990,7 +990,8 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* Sequence seq(in); m->gobble(in); if (seq.getName() != "") { - if (seq.getAligned().length() != alignmentLength) { m->mothurOut("Sequences are not all the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true; } + if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " length = " + toString(seq.getAligned().length())); m->mothurOutEndLine();} + if (seq.getAligned().length() != alignmentLength) { m->mothurOut("[ERROR]: Sequences are not all the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true; } if(trump != '*') { F.doTrump(seq); } if(m->isTrue(vertical) || soft != 0) { F.getFreqs(seq); } diff --git a/mothur.h b/mothur.h index 67c8ab3..102b44d 100644 --- a/mothur.h +++ b/mothur.h @@ -242,7 +242,24 @@ inline bool compareIndexes(PDistCell left, PDistCell right){ //******************************************************************************************************************** inline bool compareSpearman(spearmanRank left, spearmanRank right){ return (left.score < right.score); -} +} +//******************************************************************************************************************** +inline double max(double left, double right){ + if (left > right) { return left; } + else { return right; } +} +//******************************************************************************************************************** +inline double max(int left, double right){ + double value = left; + if (left > right) { return value; } + else { return right; } +} +//******************************************************************************************************************** +inline double max(double left, int right){ + double value = right; + if (left > value) { return left; } + else { return value; } +} //******************************************************************************************************************** //sorts highest to lowest inline bool compareSeqPriorityNodes(seqPriorityNode left, seqPriorityNode right){ diff --git a/rabundvector.cpp b/rabundvector.cpp index 6cbaa0d..4d985e9 100644 --- a/rabundvector.cpp +++ b/rabundvector.cpp @@ -76,6 +76,7 @@ RAbundVector::RAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), f >> inputData; set(i, inputData); } + } catch(exception& e) { m->errorOut(e, "RAbundVector", "RAbundVector"); diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index 75a87ef..8146285 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -23,6 +23,7 @@ #include "shannon.h" #include "jackknife.h" #include "coverage.h" +#include "shannonrange.h" //********************************************************************************************************************** @@ -35,8 +36,9 @@ vector RareFactCommand::setParameters(){ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); - CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap", "sobs", "", "", "","",true,false,true); parameters.push_back(pcalc); + CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-shannonrange", "sobs", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund); + CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); @@ -63,6 +65,7 @@ string RareFactCommand::getHelpString(){ helpString += "rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n"; helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n"; helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n"; + helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n"; validCalculator.printCalc("rarefaction"); helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; @@ -86,6 +89,7 @@ string RareFactCommand::getOutputPattern(string type) { else if (type == "r_shannoneven") { pattern = "[filename],r_shannoneven"; } else if (type == "r_smithwilson") { pattern = "[filename],r_smithwilson"; } else if (type == "r_npshannon") { pattern = "[filename],r_npshannon"; } + else if (type == "r_shannonrange"){ pattern = "[filename],r_shannonrange"; } else if (type == "r_simpson") { pattern = "[filename],r_simpson"; } else if (type == "r_simpsoneven") { pattern = "[filename],r_simpsoneven"; } else if (type == "r_invsimpson") { pattern = "[filename],r_invsimpson"; } @@ -114,6 +118,7 @@ RareFactCommand::RareFactCommand(){ outputTypes["r_jack"] = tempOutNames; outputTypes["r_shannon"] = tempOutNames; outputTypes["r_shannoneven"] = tempOutNames; + outputTypes["r_shannonrange"] = tempOutNames; outputTypes["r_heip"] = tempOutNames; outputTypes["r_smithwilson"] = tempOutNames; outputTypes["r_npshannon"] = tempOutNames; @@ -161,6 +166,7 @@ RareFactCommand::RareFactCommand(string option) { outputTypes["r_jack"] = tempOutNames; outputTypes["r_shannon"] = tempOutNames; outputTypes["r_shannoneven"] = tempOutNames; + outputTypes["r_shannonrange"] = tempOutNames; outputTypes["r_heip"] = tempOutNames; outputTypes["r_smithwilson"] = tempOutNames; outputTypes["r_npshannon"] = tempOutNames; @@ -291,6 +297,11 @@ RareFactCommand::RareFactCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); m->mothurConvert(temp, processors); + + temp = validParameter.validFile(parameters, "alpha", false); if (temp == "not found") { temp = "1"; } + m->mothurConvert(temp, alpha); + + if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2."); m->mothurOutEndLine(); abort=true; } temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "T"; } groupMode = m->isTrue(temp); @@ -356,7 +367,10 @@ int RareFactCommand::execute(){ }else if (Estimators[i] == "heip") { rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(getOutputFileName("r_heip",variables)))); outputNames.push_back(getOutputFileName("r_heip",variables)); outputTypes["r_heip"].push_back(getOutputFileName("r_heip",variables)); - }else if (Estimators[i] == "smithwilson") { + }else if (Estimators[i] == "r_shannonrange") { + rDisplays.push_back(new RareDisplay(new RangeShannon(alpha), new ThreeColumnFile(getOutputFileName("r_shannonrange", variables)))); + outputNames.push_back(getOutputFileName("r_shannonrange", variables)); outputTypes["r_shannoneven"].push_back(getOutputFileName("r_shannonrange", variables)); + }else if (Estimators[i] == "smithwilson") { rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(getOutputFileName("r_smithwilson",variables)))); outputNames.push_back(getOutputFileName("r_smithwilson",variables)); outputTypes["r_smithwilson"].push_back(getOutputFileName("r_smithwilson",variables)); }else if (Estimators[i] == "npshannon") { diff --git a/rarefactcommand.h b/rarefactcommand.h index 02fe6e3..09de39f 100644 --- a/rarefactcommand.h +++ b/rarefactcommand.h @@ -41,7 +41,7 @@ private: OrderVector* order; InputData* input; Rarefact* rCurve; - int nIters, abund, processors; + int nIters, abund, processors, alpha; float freq; bool abort, allLines, groupMode; diff --git a/sabundvector.cpp b/sabundvector.cpp index 1bceec2..0b3dde6 100644 --- a/sabundvector.cpp +++ b/sabundvector.cpp @@ -54,13 +54,12 @@ SAbundVector::SAbundVector(ifstream& f): DataVector(), maxRank(0), numBins(0), n try { int hold; f >> label >> hold; - + data.assign(hold+1, 0); int inputData; for(int i=1;i<=hold;i++){ f >> inputData; - set(i, inputData); } diff --git a/shannonrange.cpp b/shannonrange.cpp index 2b8468b..cbcacd8 100644 --- a/shannonrange.cpp +++ b/shannonrange.cpp @@ -17,35 +17,72 @@ EstOutput RangeShannon::getValues(SAbundVector* rank){ double commSize = 1e20; double sampleSize = rank->getNumSeqs(); - double aux = ceil(pow(sampleSize+1, (1/(double)3))); - double est0 = rank->get(1)+1; - if (aux > est0) { est0 = aux; } //est0 = max(rank->get(1)+1, aux) + vector freqx; + vector freqy; + for (int i = 1; i <=rank->getMaxRank(); i++) { + int abund = rank->get(i); + if (abund != 0) { + freqx.push_back(i); + freqy.push_back(abund); + } + } + + double aux = ceil(pow((sampleSize+1), (1/(double)3))); + double est0 = max(freqy[0]+1, aux); vector ests; double numr = 0.0; - for (int i = 1; i < rank->getNumBins()-1; i++) { + double denr = 0.0; + for (int i = 0; i < freqx.size()-1; i++) { if (m->control_pressed) { break; } - int abund = rank->get(i); - - if (abund != 0) { + if (freqx[i+1] == freqx[i]+1) { numr = max(freqy[i+1]+1, aux); } + else { numr = aux; } - int abundNext = rank->get(i+1); - if (abundNext == 0) { numr = aux; } - else { - if (abundNext+1 > aux) { numr = abundNext+1; } //numr = max(abundNext+1, aux) - else { numr = aux; } - } - double denr = aux; - if (abund > aux) { denr = abund; } //denr = max(abund, aux) - ests.push_back((abund+1)*numr/denr); - } + denr = max(freqy[i], aux); + ests.push_back((freqx[i]+1)*numr/(double)denr); } numr = aux; + denr = max(freqy[freqy.size()-1], aux); + ests.push_back((freqx[freqx.size()-1]+1)*numr/(double)denr); - - if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + double sum = 0.0; + for (int i = 0; i < freqy.size(); i++) { sum += (ests[i]*freqy[i]); } + double nfac = est0 + sum; + est0 /= nfac; + + for (int i = 0; i < ests.size(); i++) { ests[i] /= nfac; } + + double abunup = 1 / commSize; + double nbrup = est0 / abunup; + double abunlow = ests[0]; + double nbrlow = est0 / abunlow; + + if (alpha == 1) { + double sum = 0.0; + for (int i = 0; i < freqy.size(); i++) { + if (m->control_pressed) { break; } + sum += (freqy[i] * ests[i] * log(ests[i])); + } + data[0] = -sum; + data[1] = exp(data[0]+nbrlow*(-abunlow*log(abunlow))); + data[2] = exp(data[0]+nbrup*(-abunup*log(abunup))); + }else { + for (int i = 0; i < freqy.size(); i++) { + if (m->control_pressed) { break; } + data[0] += (freqy[i] * (pow(ests[i],alpha))); + } + data[1] = pow(data[0]+nbrup*pow(abunup,alpha), (1/(1-alpha))); + data[2] = pow(data[0]+nbrlow*pow(abunlow,alpha), (1/(1-alpha))); + } + + //this calc has no data[0], just a lower and upper estimate. set data[0] to lower estimate. + data[0] = data[1]; + if (data[1] > data[2]) { data[1] = data[2]; data[2] = data[0]; } + data[0] = data[1]; + + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } diff --git a/shannonrange.h b/shannonrange.h index 2123e05..c5acd69 100644 --- a/shannonrange.h +++ b/shannonrange.h @@ -6,6 +6,13 @@ // Copyright (c) 2014 Schloss Lab. All rights reserved. // +/* + 1] Haegeman, B., Hamelin, J., Moriarty, J., Neal, P., Dushoff, J., & Weitz, J. S. (2013). Robust estimation of microbial diversity in theory and in practice. The ISME journal, 7(6), 1092–1101. + [2] Hill, M. O. (1973). Diversity and evenness: A unifying notation and its consequences. Ecology, 54(2), 427–432. + [3] Orlitsky, A., Santhanam, N. P., & Zhang, J. (2003). Always Good Turing: Asymptoti- cally optimal probability estimation. Science, 302(5644), 427–431. + [4] Roesch, L. F., Fulthorpe, R. R., Riva, A., Casella, G., Hadwin, A. K., Kent, A. D., et al. (2007). Pyrosequencing enumerates and contrasts soil microbial diversity. The ISME Journal, 1(4), 283–290. + */ + #ifndef Mothur_shannonrange_h #define Mothur_shannonrange_h @@ -16,10 +23,12 @@ class RangeShannon : public Calculator { public: - RangeShannon() : Calculator("rangeshannon", 3, false) {}; + RangeShannon(int a) : alpha(a), Calculator("rangeshannon", 3, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; - string getCitation() { return "http://www.mothur.org/wiki/rangeshannon"; } + string getCitation() { return "Haegeman, B., Hamelin, J., Moriarty, J., Neal, P., Dushoff, J., & Weitz, J. S. (2013). Robust estimation of microbial diversity in theory and in practice. The ISME journal, 7(6), 1092–1101., http://www.mothur.org/wiki/rangeshannon"; } +private: + int alpha; }; /***********************************************************************/ diff --git a/summarycommand.cpp b/summarycommand.cpp index 43202e5..2bed467 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -34,6 +34,7 @@ #include "solow.h" #include "shen.h" #include "subsample.h" +#include "shannonrange.h" //********************************************************************************************************************** vector SummaryCommand::setParameters(){ @@ -45,8 +46,9 @@ vector SummaryCommand::setParameters(){ CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); - CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson", "", "", "","",true,false,true); parameters.push_back(pcalc); - CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund); + CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson-shannonrange", "", "", "","",true,false,true); parameters.push_back(pcalc); + CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha); + CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund); CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize); CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); @@ -75,6 +77,7 @@ string SummaryCommand::getHelpString(){ helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n"; helpString += "The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n"; helpString += "If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n"; + helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n"; return helpString; @@ -268,6 +271,11 @@ SummaryCommand::SummaryCommand(string option) { else { subsample = false; subsampleSize = -1; } } + temp = validParameter.validFile(parameters, "alpha", false); if (temp == "not found") { temp = "1"; } + m->mothurConvert(temp, alpha); + + if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2."); m->mothurOutEndLine(); abort=true; } + if (subsample == false) { iters = 0; } else { //if you did not set a samplesize and are not using a sharedfile @@ -346,6 +354,8 @@ int SummaryCommand::execute(){ sumCalculators.push_back(new Shannon()); }else if(Estimators[i] == "shannoneven"){ sumCalculators.push_back(new ShannonEven()); + }else if(Estimators[i] == "shannonrange"){ + sumCalculators.push_back(new RangeShannon(alpha)); }else if(Estimators[i] == "npshannon"){ sumCalculators.push_back(new NPShannon()); }else if(Estimators[i] == "heip"){ diff --git a/summarycommand.h b/summarycommand.h index 3c84207..891d157 100644 --- a/summarycommand.h +++ b/summarycommand.h @@ -39,7 +39,7 @@ private: vector sumCalculators; InputData* input; SAbundVector* sabund; - int abund, size, iters, subsampleSize; + int abund, size, iters, subsampleSize, alpha; bool abort, allLines, groupMode, subsample; set labels; //holds labels to be used diff --git a/validcalculator.cpp b/validcalculator.cpp index 98f0d9a..f5f6562 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -78,6 +78,7 @@ #include "sharednseqs.h" #include "sharedjsd.h" #include "sharedrjsd.h" +#include "shannonrange.h" /********************************************************************/ @@ -139,6 +140,7 @@ void ValidCalculators::printCitations(vector Estimators) { }else if (Estimators[i] == "jack") { Calculator* temp = new Jackknife(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; }else if (Estimators[i] == "shannon") { Calculator* temp = new Shannon(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; }else if (Estimators[i] == "shannoneven") { Calculator* temp = new ShannonEven(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; + }else if (Estimators[i] == "shannonrange") { Calculator* temp = new RangeShannon(0); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; }else if (Estimators[i] == "npshannon") { Calculator* temp = new NPShannon(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; }else if (Estimators[i] == "heip") { Calculator* temp = new Heip(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; @@ -392,6 +394,7 @@ void ValidCalculators::initialSingle() { single["shannon"] = "shannon"; single["npshannon"] = "npshannon"; single["shannoneven"] = "shannoneven"; + single["shannonrange"] = "shannonrange"; single["smithwilson"] = "smithwilson"; single["heip"] = "heip"; single["simpson"] = "simpson"; @@ -482,6 +485,7 @@ void ValidCalculators::initialRarefaction() { rarefaction["heip"] = "heip"; rarefaction["npshannon"] = "npshannon"; rarefaction["shannoneven"] = "shannoneven"; + rarefaction["shannonrange"] = "shannonrange"; rarefaction["simpson"] = "simpson"; rarefaction["invsimpson"] = "invsimpson"; rarefaction["simpsoneven"] = "simpsoneven"; @@ -510,6 +514,7 @@ void ValidCalculators::initialSummary() { summary["smithwilson"] = "smithwilson"; summary["invsimpson"] = "invsimpson"; summary["npshannon"] = "npshannon"; + summary["shannonrange"] = "shannonrange"; summary["simpson"] = "simpson"; summary["simpsoneven"] = "simpsoneven"; summary["bergerparker"] = "bergerparker"; -- 2.39.2