]> git.donarmstrong.com Git - mothur.git/commitdiff
added shannonrange calc.
authorSarah Westcott <mothur.westcott@gmail.com>
Wed, 5 Mar 2014 19:32:31 +0000 (14:32 -0500)
committerSarah Westcott <mothur.westcott@gmail.com>
Wed, 5 Mar 2014 19:32:31 +0000 (14:32 -0500)
14 files changed:
Mothur.xcodeproj/project.pbxproj
collectcommand.cpp
collectcommand.h
filterseqscommand.cpp
mothur.h
rabundvector.cpp
rarefactcommand.cpp
rarefactcommand.h
sabundvector.cpp
shannonrange.cpp
shannonrange.h
summarycommand.cpp
summarycommand.h
validcalculator.cpp

index 418596f842d89d64d087451443cabb3fc1d3ae15..80b05db9c02288a1c91fb0085c13426774d06fc7 100644 (file)
                                A7A09B0E18773BF700FAA081 /* shannonrange.h */,
                                A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */,
                                A7E9B7E912D37EC400DA6239 /* sharedace.cpp */,
+                               A7E9B7F412D37EC400DA6239 /* sharedjabund.cpp */,
                                A7E9B7EA12D37EC400DA6239 /* sharedace.h */,
                                A7E9B7EC12D37EC400DA6239 /* sharedanderbergs.cpp */,
                                A7E9B7ED12D37EC400DA6239 /* sharedanderbergs.h */,
                                A7E9B7EF12D37EC400DA6239 /* sharedbraycurtis.h */,
                                A7E9B7F012D37EC400DA6239 /* sharedchao1.cpp */,
                                A7E9B7F112D37EC400DA6239 /* sharedchao1.h */,
-                               A7E9B7F412D37EC400DA6239 /* sharedjabund.cpp */,
                                A7E9B7F512D37EC400DA6239 /* sharedjabund.h */,
                                A7E9B7F612D37EC400DA6239 /* sharedjackknife.cpp */,
                                A7E9B7F712D37EC400DA6239 /* sharedjackknife.h */,
index b892a91bee89e9b6d01ac7bda8e40833fd4db3b0..4b34447480b9f20a6c06908b31ef075515a7e138 100644 (file)
@@ -33,6 +33,7 @@
 #include "solow.h"
 #include "shen.h"
 #include "coverage.h"
+#include "shannonrange.h"
 
 
 //**********************************************************************************************************************
@@ -44,9 +45,10 @@ vector<string> CollectCommand::setParameters(){
                CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(pshared);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
                CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
-               CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson", "", "", "","",true,false,true); parameters.push_back(pcalc);
+               CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson-shannonrange", "", "", "","",true,false,true); parameters.push_back(pcalc);
                CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund);
-               CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize);
+        CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha);
+        CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
@@ -64,12 +66,13 @@ string CollectCommand::getHelpString(){
        try {
                string helpString = "";
                ValidCalculators validCalculator;
-               helpString += "The collect.single command parameters are list, sabund, rabund, shared, label, freq, calc and abund.  list, sabund, rabund or shared is required unless you have a valid current file. \n";
+               helpString += "The collect.single command parameters are list, sabund, rabund, shared, label, freq, calc, alpha and abund.  list, sabund, rabund or shared is required unless you have a valid current file. \n";
                helpString += "The collect.single command should be in the following format: \n";
                helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
                helpString += "collect.single(label=yourLabel, freq=yourFreq, calc=yourEstimators).\n";
                helpString += "Example collect(label=unique-.01-.03, freq=10, calc=sobs-chao-ace-jack).\n";
                helpString += "The default values for freq is 100, and calc are sobs-chao-ace-jack-shannon-npshannon-simpson.\n";
+        helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n";
                helpString += validCalculator.printCalc("single");
                helpString += "The label parameter is used to analyze specific labels in your input.\n";
                helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
@@ -93,6 +96,7 @@ string CollectCommand::getOutputPattern(string type) {
         else if (type == "jack")        {  pattern =  "[filename],jack";            }
         else if (type == "shannon")     {  pattern =  "[filename],shannon";         }
         else if (type == "shannoneven") {  pattern =  "[filename],shannoneven";     }
+        else if (type == "shannonrange"){  pattern =  "[filename],shannonrange";    }
         else if (type == "npshannon")   {  pattern =  "[filename],npshannon";       }
         else if (type == "heip")        {  pattern =  "[filename],heip";            }
         else if (type == "smithwilson") {  pattern =  "[filename],smithwilson";     }
@@ -133,6 +137,7 @@ CollectCommand::CollectCommand(){
                outputTypes["jack"] = tempOutNames;
                outputTypes["shannon"] = tempOutNames;
                outputTypes["shannoneven"] = tempOutNames;
+        outputTypes["shannonrange"] = tempOutNames;
                outputTypes["npshannon"] = tempOutNames;
                outputTypes["heip"] = tempOutNames;
                outputTypes["smithwilson"] = tempOutNames;
@@ -195,6 +200,7 @@ CollectCommand::CollectCommand(string option)  {
                        outputTypes["smithwilson"] = tempOutNames;
                        outputTypes["simpson"] = tempOutNames;
                        outputTypes["simpsoneven"] = tempOutNames;
+            outputTypes["shannonrange"] = tempOutNames;
                        outputTypes["invsimpson"] = tempOutNames;
                        outputTypes["bootstrap"] = tempOutNames;
                        outputTypes["geometric"] = tempOutNames;
@@ -319,7 +325,12 @@ CollectCommand::CollectCommand(string option)  {
 
                        string temp;
                        temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
-                       m->mothurConvert(temp, freq); 
+                       m->mothurConvert(temp, freq);
+            
+            temp = validParameter.validFile(parameters, "alpha", false);               if (temp == "not found") { temp = "1"; }
+                       m->mothurConvert(temp, alpha);
+            
+            if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2."); m->mothurOutEndLine(); abort=true; }
                        
                        temp = validParameter.validFile(parameters, "abund", false);            if (temp == "not found") { temp = "10"; }
                        m->mothurConvert(temp, abund); 
@@ -386,6 +397,9 @@ int CollectCommand::execute(){
                                        }else if (Estimators[i] == "shannoneven") { 
                                                cDisplays.push_back(new CollectDisplay(new ShannonEven(), new OneColumnFile(getOutputFileName("shannoneven", variables))));
                                                outputNames.push_back(getOutputFileName("shannoneven", variables)); outputTypes["shannoneven"].push_back(getOutputFileName("shannoneven", variables));
+                    }else if (Estimators[i] == "shannonrange") {
+                            cDisplays.push_back(new CollectDisplay(new RangeShannon(alpha), new ThreeColumnFile(getOutputFileName("shannonrange", variables))));
+                            outputNames.push_back(getOutputFileName("shannonrange", variables)); outputTypes["shannoneven"].push_back(getOutputFileName("shannonrange", variables));
                                        }else if (Estimators[i] == "npshannon") { 
                                                cDisplays.push_back(new CollectDisplay(new NPShannon(), new OneColumnFile(getOutputFileName("npshannon", variables))));
                                                outputNames.push_back(getOutputFileName("npshannon", variables)); outputTypes["npshannon"].push_back(getOutputFileName("npshannon", variables));
index ee1b914ee1594b4997a7706be60606e686fda305..8423b7b762233c549ce62283d5ecb800c7ceb08f 100644 (file)
@@ -52,7 +52,7 @@ private:
        InputData* input;
        Collect* cCurve;
        vector<Display*> cDisplays;
-       int abund, size;
+       int abund, size, alpha;
        float freq;
        vector<string> outputNames;
 
index 8b68988884acb408d0e72c5c3fa407a7b9f241b6..4ac3381d384edf1979f78c911381e040666b8afe 100644 (file)
@@ -990,7 +990,8 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair*
                                        
                        Sequence seq(in); m->gobble(in);
                        if (seq.getName() != "") {
-                                       if (seq.getAligned().length() != alignmentLength) { m->mothurOut("Sequences are not all the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true;  }
+                    if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " length = " + toString(seq.getAligned().length())); m->mothurOutEndLine();}
+                                       if (seq.getAligned().length() != alignmentLength) { m->mothurOut("[ERROR]: Sequences are not all the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true;  }
                                        
                                        if(trump != '*')                        {       F.doTrump(seq);         }
                                        if(m->isTrue(vertical) || soft != 0)    {       F.getFreqs(seq);        }
index 67c8ab3feaf7f7a82d611e2ed4b60c44fb86e86e..102b44d57030561244c8ba6205f7c02c831b9b36 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -242,7 +242,24 @@ inline bool compareIndexes(PDistCell left, PDistCell right){
 //********************************************************************************************************************
 inline bool compareSpearman(spearmanRank left, spearmanRank right){
        return (left.score < right.score);      
-} 
+}
+//********************************************************************************************************************
+inline double max(double left, double right){
+    if (left > right) { return left; }
+    else { return right; }
+}
+//********************************************************************************************************************
+inline double max(int left, double right){
+    double value = left;
+    if (left > right) { return value; }
+    else { return right; }
+}
+//********************************************************************************************************************
+inline double max(double left, int right){
+    double value = right;
+    if (left > value) { return left; }
+    else { return value; }
+}
 //********************************************************************************************************************
 //sorts highest to lowest
 inline bool compareSeqPriorityNodes(seqPriorityNode left, seqPriorityNode right){
index 6cbaa0d896ad7d47105873c67cdc6d003329b83a..4d985e9f8a5ead507841799f3fa24b095017e710 100644 (file)
@@ -76,6 +76,7 @@ RAbundVector::RAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0),
                        f >> inputData;
                        set(i, inputData);
                }
+        
        }
        catch(exception& e) {
                m->errorOut(e, "RAbundVector", "RAbundVector");
index 75a87efa3a2f95259488b0abf15887fb39ee1955..8146285921fb271ee45b916ca5644886dbd3cf36 100644 (file)
@@ -23,6 +23,7 @@
 #include "shannon.h"
 #include "jackknife.h"
 #include "coverage.h"
+#include "shannonrange.h"
 
 
 //**********************************************************************************************************************
@@ -35,8 +36,9 @@ vector<string> RareFactCommand::setParameters(){
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
                CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
                CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
-               CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap", "sobs", "", "", "","",true,false,true); parameters.push_back(pcalc);
+               CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-shannonrange", "sobs", "", "", "","",true,false,true); parameters.push_back(pcalc);
                CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund);
+        CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
@@ -63,6 +65,7 @@ string RareFactCommand::getHelpString(){
                helpString += "rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
                helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n";
                helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n";
+        helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n";
                validCalculator.printCalc("rarefaction");
                helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n";
                helpString += "The label parameter is used to analyze specific labels in your input.\n";
@@ -86,6 +89,7 @@ string RareFactCommand::getOutputPattern(string type) {
         else if (type == "r_shannoneven") {  pattern =  "[filename],r_shannoneven"; }
         else if (type == "r_smithwilson") {  pattern =  "[filename],r_smithwilson"; }
         else if (type == "r_npshannon") {  pattern =  "[filename],r_npshannon"; }
+        else if (type == "r_shannonrange"){  pattern =  "[filename],r_shannonrange";    }
         else if (type == "r_simpson") {  pattern =  "[filename],r_simpson"; }
         else if (type == "r_simpsoneven") {  pattern =  "[filename],r_simpsoneven"; }
         else if (type == "r_invsimpson") {  pattern =  "[filename],r_invsimpson"; }
@@ -114,6 +118,7 @@ RareFactCommand::RareFactCommand(){
                outputTypes["r_jack"] = tempOutNames;
                outputTypes["r_shannon"] = tempOutNames;
                outputTypes["r_shannoneven"] = tempOutNames;
+        outputTypes["r_shannonrange"] = tempOutNames;
                outputTypes["r_heip"] = tempOutNames;
                outputTypes["r_smithwilson"] = tempOutNames;
                outputTypes["r_npshannon"] = tempOutNames;
@@ -161,6 +166,7 @@ RareFactCommand::RareFactCommand(string option)  {
                        outputTypes["r_jack"] = tempOutNames;
                        outputTypes["r_shannon"] = tempOutNames;
                        outputTypes["r_shannoneven"] = tempOutNames;
+            outputTypes["r_shannonrange"] = tempOutNames;
                        outputTypes["r_heip"] = tempOutNames;
                        outputTypes["r_smithwilson"] = tempOutNames;
                        outputTypes["r_npshannon"] = tempOutNames;
@@ -291,6 +297,11 @@ RareFactCommand::RareFactCommand(string option)  {
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
+            
+            temp = validParameter.validFile(parameters, "alpha", false);               if (temp == "not found") { temp = "1"; }
+                       m->mothurConvert(temp, alpha);
+            
+            if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2."); m->mothurOutEndLine(); abort=true; }
                        
                        temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
                        groupMode = m->isTrue(temp);
@@ -356,7 +367,10 @@ int RareFactCommand::execute(){
                                        }else if (Estimators[i] == "heip") { 
                                                rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(getOutputFileName("r_heip",variables))));
                                                outputNames.push_back(getOutputFileName("r_heip",variables)); outputTypes["r_heip"].push_back(getOutputFileName("r_heip",variables));
-                                       }else if (Estimators[i] == "smithwilson") { 
+                    }else if (Estimators[i] == "r_shannonrange") {
+                        rDisplays.push_back(new RareDisplay(new RangeShannon(alpha), new ThreeColumnFile(getOutputFileName("r_shannonrange", variables))));
+                        outputNames.push_back(getOutputFileName("r_shannonrange", variables)); outputTypes["r_shannoneven"].push_back(getOutputFileName("r_shannonrange", variables));
+                                       }else if (Estimators[i] == "smithwilson") {
                                                rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(getOutputFileName("r_smithwilson",variables))));
                                                outputNames.push_back(getOutputFileName("r_smithwilson",variables)); outputTypes["r_smithwilson"].push_back(getOutputFileName("r_smithwilson",variables));
                                        }else if (Estimators[i] == "npshannon") { 
index 02fe6e3aa4355a107e76f06d8f8f9a7f0df19118..09de39fddfe89bc48fd9397a4e0fffc5e32d495a 100644 (file)
@@ -41,7 +41,7 @@ private:
        OrderVector* order;
        InputData* input;
        Rarefact* rCurve;
-       int nIters, abund, processors;
+       int nIters, abund, processors, alpha;
        float freq;
        
        bool abort, allLines, groupMode;
index 1bceec210779221ce0dfce33886b177e5250efd8..0b3dde6fe9312b72d2f08a716b5b61ee01cca64d 100644 (file)
@@ -54,13 +54,12 @@ SAbundVector::SAbundVector(ifstream& f): DataVector(), maxRank(0), numBins(0), n
        try {
                int hold;
                f >> label >> hold;
-       
+        
                data.assign(hold+1, 0);
                int inputData;
        
                for(int i=1;i<=hold;i++){
                        f >> inputData;
-
                        set(i, inputData);
                }
 
index 2b8468bd2d096f71c6c33289d371829d0a69dcba..cbcacd8c0a21e8fc635bae96782a3d6e12ee5643 100644 (file)
@@ -17,35 +17,72 @@ EstOutput RangeShannon::getValues(SAbundVector* rank){
         double commSize = 1e20;
         double sampleSize = rank->getNumSeqs();
         
-        double aux = ceil(pow(sampleSize+1, (1/(double)3)));
-        double est0 = rank->get(1)+1;
-        if (aux > est0) { est0 = aux; } //est0 = max(rank->get(1)+1, aux)
+        vector<int> freqx;
+        vector<int> freqy;
+        for (int i = 1; i <=rank->getMaxRank(); i++) {
+            int abund = rank->get(i);
+            if (abund != 0) {
+                freqx.push_back(i);
+                freqy.push_back(abund);
+            }
+        }
+        
+        double aux = ceil(pow((sampleSize+1), (1/(double)3)));
+        double est0 = max(freqy[0]+1, aux);
         
         vector<double> ests;
         double numr = 0.0;
-        for (int i = 1; i < rank->getNumBins()-1; i++) {
+        double denr = 0.0;
+        for (int i = 0; i < freqx.size()-1; i++) {
             
             if (m->control_pressed) { break; }
             
-            int abund = rank->get(i);
-            
-            if (abund != 0) {
+            if (freqx[i+1] == freqx[i]+1)   { numr = max(freqy[i+1]+1, aux);    }
+            else                            { numr = aux;                       }
             
-                int abundNext = rank->get(i+1);
-                if (abundNext == 0) {  numr = aux;  }
-                else {
-                    if (abundNext+1 > aux) { numr = abundNext+1; } //numr = max(abundNext+1, aux)
-                    else { numr = aux; }
-                }
-                double denr = aux;
-                if (abund > aux) { denr = abund; } //denr = max(abund, aux)
-                ests.push_back((abund+1)*numr/denr);
-             }
+            denr = max(freqy[i], aux);
+            ests.push_back((freqx[i]+1)*numr/(double)denr);
         }
         numr = aux;
+        denr = max(freqy[freqy.size()-1], aux);
+        ests.push_back((freqx[freqx.size()-1]+1)*numr/(double)denr);
         
-               
-               if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
+        double sum = 0.0;
+        for (int i = 0; i < freqy.size(); i++) {  sum += (ests[i]*freqy[i]); }
+        double nfac = est0 + sum;
+        est0 /= nfac;
+        
+        for (int i = 0; i < ests.size(); i++) {  ests[i] /= nfac;   }
+        
+        double abunup = 1 / commSize;
+        double nbrup = est0 / abunup;
+        double abunlow = ests[0];
+        double nbrlow = est0 / abunlow;
+        
+        if (alpha == 1) {
+            double sum = 0.0;
+            for (int i = 0; i < freqy.size(); i++) {
+                if (m->control_pressed) { break; }
+                sum += (freqy[i] * ests[i] * log(ests[i]));
+            }
+            data[0] = -sum;
+            data[1] = exp(data[0]+nbrlow*(-abunlow*log(abunlow)));
+            data[2] = exp(data[0]+nbrup*(-abunup*log(abunup)));
+        }else {
+            for (int i = 0; i < freqy.size(); i++) {
+                if (m->control_pressed) { break; }
+                data[0] += (freqy[i] * (pow(ests[i],alpha)));
+            }
+            data[1] = pow(data[0]+nbrup*pow(abunup,alpha), (1/(1-alpha)));
+            data[2] = pow(data[0]+nbrlow*pow(abunlow,alpha), (1/(1-alpha)));
+        }
+        
+        //this calc has no data[0], just a lower and upper estimate. set data[0] to lower estimate.
+        data[0] = data[1];
+        if (data[1] > data[2]) { data[1] = data[2]; data[2] = data[0]; }
+        data[0] = data[1];
+        
+               if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
                
                return data;
        }
index 2123e05928c0be0bef9db1f18dbcffae0d3940fe..c5acd69096ba04cd3e9c5edece2047f2ff9f90ba 100644 (file)
@@ -6,6 +6,13 @@
 //  Copyright (c) 2014 Schloss Lab. All rights reserved.
 //
 
+/*
+ 1] Haegeman, B., Hamelin, J., Moriarty, J., Neal, P., Dushoff, J., & Weitz, J. S. (2013). Robust estimation of microbial diversity in theory and in practice. The ISME journal, 7(6), 1092–1101.
+ [2] Hill, M. O. (1973). Diversity and evenness: A unifying notation and its consequences. Ecology, 54(2), 427–432.
+ [3] Orlitsky, A., Santhanam, N. P., & Zhang, J. (2003). Always Good Turing: Asymptoti- cally optimal probability estimation. Science, 302(5644), 427–431.
+ [4] Roesch, L. F., Fulthorpe, R. R., Riva, A., Casella, G., Hadwin, A. K., Kent, A. D., et al. (2007). Pyrosequencing enumerates and contrasts soil microbial diversity. The ISME Journal, 1(4), 283–290.
+ */
+
 #ifndef Mothur_shannonrange_h
 #define Mothur_shannonrange_h
 
 class RangeShannon : public Calculator  {
        
 public:
-       RangeShannon() : Calculator("rangeshannon", 3, false) {};
+       RangeShannon(int a) : alpha(a), Calculator("rangeshannon", 3, false) {};
        EstOutput getValues(SAbundVector*);
        EstOutput getValues(vector<SharedRAbundVector*>) {return data;};
-       string getCitation() { return "http://www.mothur.org/wiki/rangeshannon"; }
+       string getCitation() { return "Haegeman, B., Hamelin, J., Moriarty, J., Neal, P., Dushoff, J., & Weitz, J. S. (2013). Robust estimation of microbial diversity in theory and in practice. The ISME journal, 7(6), 1092–1101., http://www.mothur.org/wiki/rangeshannon"; }
+private:
+    int alpha;
 };
 
 /***********************************************************************/
index 43202e56cd2b86d7224a8b46b26526a511c6d55a..2bed467066da5033680af7a3b4a5c0cc7cf83a42 100644 (file)
@@ -34,6 +34,7 @@
 #include "solow.h"
 #include "shen.h"
 #include "subsample.h"
+#include "shannonrange.h"
 
 //**********************************************************************************************************************
 vector<string> SummaryCommand::setParameters(){        
@@ -45,8 +46,9 @@ vector<string> SummaryCommand::setParameters(){
         CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
-               CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson", "", "", "","",true,false,true); parameters.push_back(pcalc);
-               CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund);
+               CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson-shannonrange", "", "", "","",true,false,true); parameters.push_back(pcalc);
+               CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha);
+        CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund);
                CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize);
                CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
@@ -75,6 +77,7 @@ string SummaryCommand::getHelpString(){
         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
                helpString += "The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n";
                helpString += "If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n";
+        helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n";
                helpString += "The label parameter is used to analyze specific labels in your input.\n";
                helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n";
                return helpString;
@@ -268,6 +271,11 @@ SummaryCommand::SummaryCommand(string option)  {
                 else { subsample = false; subsampleSize = -1; }
             }
             
+            temp = validParameter.validFile(parameters, "alpha", false);               if (temp == "not found") { temp = "1"; }
+                       m->mothurConvert(temp, alpha);
+            
+            if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2."); m->mothurOutEndLine(); abort=true; }
+            
             if (subsample == false) { iters = 0; }
             else {
                 //if you did not set a samplesize and are not using a sharedfile
@@ -346,6 +354,8 @@ int SummaryCommand::execute(){
                                                sumCalculators.push_back(new Shannon());
                                        }else if(Estimators[i] == "shannoneven"){
                                                sumCalculators.push_back(new ShannonEven());
+                    }else if(Estimators[i] == "shannonrange"){
+                                               sumCalculators.push_back(new RangeShannon(alpha));
                                        }else if(Estimators[i] == "npshannon"){
                                                sumCalculators.push_back(new NPShannon());
                                        }else if(Estimators[i] == "heip"){
index 3c8420795550e1f39ef5529ad235d13e11e5e7a5..891d1570e3e55fd24f4d58260c023924584d7756 100644 (file)
@@ -39,7 +39,7 @@ private:
        vector<Calculator*> sumCalculators;     
        InputData* input;
        SAbundVector* sabund;
-       int abund, size, iters, subsampleSize;
+       int abund, size, iters, subsampleSize, alpha;
 
        bool abort, allLines, groupMode, subsample;
        set<string> labels; //holds labels to be used
index 98f0d9a2a83f4a5c903756e454cd006354ca721d..f5f6562f37b0d9f16bed2271f92a6b497a822d4b 100644 (file)
@@ -78,6 +78,7 @@
 #include "sharednseqs.h"
 #include "sharedjsd.h"
 #include "sharedrjsd.h"
+#include "shannonrange.h"
 
 
 /********************************************************************/
@@ -139,6 +140,7 @@ void ValidCalculators::printCitations(vector<string> Estimators) {
                                }else if (Estimators[i] == "jack") { Calculator* temp = new Jackknife(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else if (Estimators[i] == "shannon") { Calculator* temp = new Shannon(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else if (Estimators[i] == "shannoneven") { Calculator* temp = new ShannonEven(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
+                }else if (Estimators[i] == "shannonrange") { Calculator* temp = new RangeShannon(0); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else if (Estimators[i] == "npshannon") { Calculator* temp = new NPShannon(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else if (Estimators[i] == "heip") { Calculator* temp = new Heip(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; 
                                
@@ -392,6 +394,7 @@ void ValidCalculators::initialSingle() {
                single["shannon"]           = "shannon";
                single["npshannon"]     = "npshannon";
                single["shannoneven"]   = "shannoneven";
+        single["shannonrange"] = "shannonrange";
                single["smithwilson"]   = "smithwilson";
                single["heip"]                  = "heip";
                single["simpson"]           = "simpson";
@@ -482,6 +485,7 @@ void ValidCalculators::initialRarefaction() {
                rarefaction["heip"]                     = "heip";
                rarefaction["npshannon"]        = "npshannon";
                rarefaction["shannoneven"]      = "shannoneven";
+        rarefaction["shannonrange"]    = "shannonrange";
                rarefaction["simpson"]          = "simpson";
                rarefaction["invsimpson"]       = "invsimpson";
                rarefaction["simpsoneven"]      = "simpsoneven";
@@ -510,6 +514,7 @@ void ValidCalculators::initialSummary() {
                summary["smithwilson"]  = "smithwilson";
                summary["invsimpson"]   = "invsimpson";
                summary["npshannon"]    = "npshannon";
+        summary["shannonrange"]        = "shannonrange";
                summary["simpson"]              = "simpson";
                summary["simpsoneven"]  = "simpsoneven";
                summary["bergerparker"] = "bergerparker";