]> git.donarmstrong.com Git - mothur.git/commitdiff
modified some calculators and other stuff - pds
authorpschloss <pschloss>
Wed, 22 Apr 2009 22:09:36 +0000 (22:09 +0000)
committerpschloss <pschloss>
Wed, 22 Apr 2009 22:09:36 +0000 (22:09 +0000)
16 files changed:
ace.cpp
bergerparker.cpp
calculator.cpp
calculator.h
chao1.cpp
collectcommand.cpp
geom.cpp
geom.h
logsd.cpp
logsd.h
npshannon.cpp
qstat.h
rarefactcommand.cpp
shannon.cpp
summarycommand.cpp
validcalculator.cpp

diff --git a/ace.cpp b/ace.cpp
index 17049bdc414f7e4cb0371332b3e2aef7ac2c4f41..54d0946eab9cf0b2230ef45ef8749b1feecb9da0 100644 (file)
--- a/ace.cpp
+++ b/ace.cpp
@@ -42,7 +42,7 @@ EstOutput Ace::getValues(SAbundVector* rank) {
        
                if(denom <= 0.0){       term1=0.0000;   } else {        term1 = (double)(srare * numsum)/(double)denom - 1.0;   }
                if(term1 >= 0.0){       gamace = term1; } else {        gamace = 0.0;                                                                                   }
-
+               
                if(gamace >= 0.64){
                        gamace = gamace * (1 + (nrare * (1 - Cace) * numsum) / denom);
                        if(gamace<0){                   gamace = 0;                     }
index 893f2a95dad68e6f9ba1638eb29be43071a711aa..c9f3565f985c52f1e744f4e320207243a6bba11d 100644 (file)
@@ -15,8 +15,7 @@ EstOutput BergerParker::getValues(SAbundVector* rank){
        try {
                data.resize(1,0);
                //Berger-Parker index
-               double BP = (double)rank->getNumSeqs()/(double)rank->getMaxRank();
-               //cout << "BP index = " << 1/BP << "\n\n";
+               double BP = (double)rank->getMaxRank() / (double)rank->getNumSeqs();
                
                data[0] = BP;
                if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
index ffd452a479c16c9df3e133d09576eac67d33c5fa..71aa99cfb1c6f4f887389e530c1ceeed9dbfeef3 100644 (file)
@@ -746,96 +746,7 @@ void KS2SampleTest::doKSTest(vector<double> abun1, vector<double> abun2)//abun1
 }
 
 /***********************************************************************/
-double logS(double num)
-{
-       return -(1-num)*log(1-num)/num;
-}
 
-/***********************************************************************/
-/*void LogSD::doLogSD(vector<double> indVec, vector<double> specVec) //indVec = individuals vector, specVec = species vector
-{      try {
-               VecCalc vecCalc;
-               double numSpec = vecCalc.sumElements(specVec);//numSpec = The total number of species
-               cout << "number of species = " << numSpec << "\n";
-               double numInd = vecCalc.sumElements(vecCalc.multVecs(indVec, specVec));
-               double snRatio = numSpec/numInd;
-               double x = .5;
-               double step = .4999999999;
-               while(fabs(snRatio - logS(x)) > .00001) //This uses a binary search to find the value of x.
-               {
-                       if(logS(x) > snRatio)
-                               x += step;
-                       else
-                               x -= step;
-                       step /= 2;
-               }
-               double alpha = numInd*(1-x)/x;
-       
-               int ind;
-               cout << "Number of individuals:"; //Ask the user for the number of individuals.
-               cin >> ind;
-               double spec = alpha*pow(x, ind)/ind;
-               cout << "Number of species expected = " << spec << "\n" << "X value = " << x << "\n" << "Alpha value= " << alpha << "\n";//Outputs the number of species expected with the given number of individuals.
-       
-               vector<double> obsSpec;
-               vector<double> cObsSpec;
-               vector<double> expSpec;
-               vector<double> cExpSpec;
-               vector<double> cDiff;
-       
-               // Generates the cumulative observed species vector.
-               int oct = 1;
-               double octSumObs = 0;
-               for(int y = 0; y < specVec.size(); y++)
-               {
-                       if(indVec.at(y) - .5 < pow(2.0, oct))
-                               octSumObs += specVec.at(y);
-                       else
-                       {
-                               obsSpec.push_back(octSumObs);
-                               octSumObs = specVec.at(y);
-                               oct++;
-                       }
-                       if(y == specVec.size()-1)
-                               obsSpec.push_back(octSumObs);
-               }
-               cObsSpec = vecCalc.genCVec(obsSpec);
-               cObsSpec = vecCalc.add(cObsSpec,-.5);
-       
-               // Generates the cumulative expected species vector.
-               oct = 1;
-               double octSumExp = 0;
-               for(int g = 1; g <= indVec.at(indVec.size()-1); g++)
-               {
-                       if(g - .5 < pow(2.0, oct))
-                               octSumExp += alpha*pow(x,g)/(g);
-                       else
-                       {
-                               expSpec.push_back(octSumExp);
-                               octSumExp = alpha*pow(x,g)/(g);
-                               oct++;
-                       }
-                       if(g == indVec.at(indVec.size()-1))
-                               expSpec.push_back(octSumExp);
-               }
-               cExpSpec = vecCalc.genCVec(expSpec);
-       
-               // Statistical Analysis
-               double dTStat = vecCalc.findDStat(cObsSpec, cExpSpec, numSpec);
-               cout << "D Test Statistic = " << dTStat << "\n";
-               cout << ".05 confidence value = " << .89196/sqrt(numSpec) << "\n";
-               cout << ".01 confidence value = " << 1.0471/sqrt(numSpec) << "\n\n";
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the LogSD class Function doLogSD. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the LogSD class function doLogSD. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
-}*/
-/***********************************************************************/
 void QStatistic::doQStat(vector<double> vec)//vec = The data vector.
 {      try {
                VecCalc vecCalc;
index 53e5c5a52578e7babcff8c487329cf320a5aa360..4cc32806ff7536908c5f75a233e255dd321dd72a 100644 (file)
@@ -79,27 +79,6 @@ class VecCalc
                vector<string> getSData(char[]);//This takes a file name as a parameter and reads all of the data in the file into a <string> vector.
 };
 
-/**************************************************************************************************/
-/*This Class contains methods that return the B Diverstiy of two sets
-of data. The four methods are the Whittaker's measure, the Marczewski-Stainhaus distance,
-the Sorensen quantitative index, and the Morisita-Horn index.
-The main method takes a number of columns of data and performs all 4 methods on each
-combination of columns. It prints a table for every method that shows the B Diverstiy for 
-each combination. It also calculates the overall diversity for Whittaker's measure and 
-the Marczewski-Steinhaus distance.*/
-
-
-/*class BDiversity
-{
-       public:
-               void doBD(vector<double>, double);//Main method
-               double getWhitt(vector<double>,vector<double>);//Whittacker's measure
-               double getMS(vector<double>, vector<double>);//Marczewski-Stainhaus distance
-               double getSor(vector<double>, vector<double>);//Sorensen quantitative index
-               double getMor(vector<double>, vector<double>);//Morisita-Horn index
-               void printD(vector<vector<double> >, int);//This prints a table that represents the given 2D vector, the second paramter specifies which method is to be used (1 for Whitt, 2 for MS, 3 for Sor, and 4 for Mor)
-};*/
-
 /**************************************************************************************************/
 
 /*This Class is similar to the GeometricSeries.h class. It calculates
@@ -143,16 +122,6 @@ class KS2SampleTest
        public:
                void doKSTest(vector<double>, vector<double>);
 };
-/**************************************************************************************************/
-/*This Class calculates the Log Series Distribution for the data.
-It then generates a D-Statistic and prints the D-Statistic and
-the critical values for the Kolmogorov-Smirnov 1 sample test.*/
-
-/*class LogSD
-{
-       public:
-               void doLogSD(vector<double>, vector<double>);
-};*/
 
 /**************************************************************************************************/
 //This Class calculates and prints the Q-Statistic for the data.
index d938eb4ab71504f66f72eec2c91f9bfbb243a212..c05bdef2b7179a01f722cf497ab67f5b72ed7e3c 100644 (file)
--- a/chao1.cpp
+++ b/chao1.cpp
@@ -20,28 +20,34 @@ EstOutput Chao1::getValues(SAbundVector* rank){
                double doubles = (double)rank->get(2);
                double chaovar = 0.0000;
        
-               double chao = sobs + pow(singles,2)/(2*(doubles+1)) - (singles*doubles/(2*pow(doubles+1,2)));
+               double chao = sobs + singles*(singles-1)/(2*(doubles+1));
        
-               if(doubles==0){chaovar=0;}
-               else{
-                       double g=singles/doubles;
-                       chaovar = doubles*(0.25*pow(g,4)+pow(g,3)+0.5*pow(g,2));
+               if(singles > 0 && doubles > 0){
+                       chaovar = singles*(singles-1)/(2*(doubles+1))
+                                       + singles*pow(2*singles-1, 2)/(4*pow(doubles+1,2))
+                                       + pow(singles, 2)*doubles*pow(singles-1, 2)/(4*pow(doubles+1,4));
                }
-
-       
+               else if(singles > 0 && doubles == 0){
+                       chaovar = singles*(singles-1)/2 + singles*pow(2*singles-1, 2)/4 - pow(singles, 4)/(4*chao); 
+               }
+               else if(singles == 0){
+                       chaovar = sobs*exp(-1*rank->getNumSeqs()/sobs)*(1-exp(-1*rank->getNumSeqs()/sobs));
+               }
+                       
                double chaohci, chaolci;
        
-               if(chao==sobs){
-                       double ci = 1.96*pow(chaovar,0.5);
-                       chaolci = chao-ci;//chao lci
-                       chaohci = chao+ci;//chao hci
-               }
-               else{
+               if(singles>0){
                        double denom = pow(chao-sobs,2);
                        double c = exp(1.96*pow((log(1+chaovar/denom)),0.5));
                        chaolci = sobs+(chao-sobs)/c;//chao lci
                        chaohci = sobs+(chao-sobs)*c;//chao hci
                }
+               else{
+                       double p = exp(-1*rank->getNumSeqs()/sobs);
+                       chaolci = sobs/(1-p)-1.96*pow(sobs*p/(1-p), 0.5);
+                       chaohci = sobs/(1-p)+1.96*pow(sobs*p/(1-p), 0.5);
+                       if(chaolci < sobs){     chaolci = sobs; }
+               }
                
                data[0] = chao;
                data[1] = chaolci;
index 62258a85661b4983ee787262f655440992087f6e..2687f9249fe3aa54b3641b0e4430e9e25da65813 100644 (file)
@@ -22,7 +22,7 @@
 #include "logsd.h"
 #include "bergerparker.h"
 #include "bstick.h"
-
+#include "coverage.h"
 
 //**********************************************************************************************************************
 CollectCommand::CollectCommand(){
@@ -40,6 +40,8 @@ CollectCommand::CollectCommand(){
                                        cDisplays.push_back(new CollectDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"chao")));
                                }else if (globaldata->Estimators[i] == "nseqs") { 
                                        cDisplays.push_back(new CollectDisplay(new NSeqs(), new OneColumnFile(fileNameRoot+"nseqs")));
+                               }else if (globaldata->Estimators[i] == "coverage") { 
+                                       cDisplays.push_back(new CollectDisplay(new Coverage(), new OneColumnFile(fileNameRoot+"coverage")));
                                }else if (globaldata->Estimators[i] == "ace") { 
                                        convert(globaldata->getAbund(), abund);
                                        cDisplays.push_back(new CollectDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"ace")));
@@ -53,12 +55,12 @@ CollectCommand::CollectCommand(){
                                        cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"simpson")));
                                }else if (globaldata->Estimators[i] == "bootstrap") { 
                                        cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+"bootstrap")));
-                               }else if (globaldata->Estimators[i] == "geom") { 
-                                       cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geom")));
+                               }else if (globaldata->Estimators[i] == "geometric") { 
+                                       cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geometric")));
                                }else if (globaldata->Estimators[i] == "qstat") { 
                                        cDisplays.push_back(new CollectDisplay(new QStat(), new OneColumnFile(fileNameRoot+"qstat")));
-                               }else if (globaldata->Estimators[i] == "logsd") { 
-                                       cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logsd")));
+                               }else if (globaldata->Estimators[i] == "logseries") { 
+                                       cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logseries")));
                                }else if (globaldata->Estimators[i] == "bergerparker") { 
                                        cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+"bergerparker")));
                                }else if (globaldata->Estimators[i] == "bstick") { 
index 1b8c4ec4c8ad69fa4112daf8a05c192b5a3044f1..31580db15b7bbb08b648cd10ff3c0bcf4cd98a10 100644 (file)
--- a/geom.cpp
+++ b/geom.cpp
@@ -42,7 +42,7 @@ RAbundVector Geom::getRAbundVector(SAbundVector* rank){
 /***********************************************************************************/
 EstOutput Geom::getValues(SAbundVector* rank){
        try {
-               data.resize(2,0);
+               data.resize(3,0);
                
                rdata = getRAbundVector(rank);
                int numInd = rdata.getNumSeqs();
@@ -64,14 +64,15 @@ EstOutput Geom::getValues(SAbundVector* rank){
                double sumExp = 0;
                double sumObs = 0;
                double maxDiff = 0;
-
-               for(int i = 0; i < rdata.size(); i++)
+               
+               for(int i = 0; i < numSpec; i++)
                {
                        sumObs += rdata.get(i);
                        sumExp += numInd*cK*k*pow(1-k, i);
+                       
                        double diff = fabs(sumObs-sumExp);
-                       if(diff > maxDiff)
-                               maxDiff = diff;
+                       if(diff > maxDiff)      {       maxDiff = diff;         }
+                       
                }
 
                double DStatistic = maxDiff/numInd;
@@ -81,7 +82,8 @@ EstOutput Geom::getValues(SAbundVector* rank){
                cout << "Critical value for 95% confidence interval = ";*/
                if(rdata.size() > 20)
                {
-                       critVal = .886/sqrt(rdata.size());
+                       data[1] = 1.031/sqrt(rdata.size());
+                       data[2] = 0.886/sqrt(rdata.size());
                }
                else
                {
@@ -92,10 +94,10 @@ EstOutput Geom::getValues(SAbundVector* rank){
                cout << "If D-Statistic is less than the critical value then the data fits the Geometric Series model w/ 95% confidence.\n\n";*/
                
                data[0] = DStatistic;
-               data[1] = critVal;
                
                if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
                if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; }
+               if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; }
                
                return data;
        }
diff --git a/geom.h b/geom.h
index b9da73f06594fd6fd41e62328698081d6213a278..05fc0d76d3e6cd89b1c8b53b3875fcd2634c6f60 100644 (file)
--- a/geom.h
+++ b/geom.h
@@ -19,7 +19,7 @@ It is a child of the calculator class. */
 class Geom : public Calculator  {
        
 public:
-       Geom() : Calculator("geom", 3) {};
+       Geom() : Calculator("geometric", 3) {};
        EstOutput getValues(SAbundVector*);
        EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;};
 
index fddee4f5e94e977bd417e0d32a82f6f37894d232..14d2e442fa2bea0a6e153dc310b59d687b51b642 100644 (file)
--- a/logsd.cpp
+++ b/logsd.cpp
@@ -28,7 +28,7 @@ EstOutput LogSD::getValues(SAbundVector* rank){
                SAbundVector rankw = SAbundVector(dvec, mr,nb,ns);
                SAbundVector *rank = &rankw;*/
                
-               data.resize(2,0);
+               data.resize(3,0);
                int numInd = rank->getNumSeqs();
                int numSpec = rank->getNumBins();
                double snRatio = (double)numSpec/numInd;
diff --git a/logsd.h b/logsd.h
index 02c2ca5c95b7839372036dcaee044c40361e2e5d..7bb12c3ce8791461bc8e904fc04d688ffb5856b6 100644 (file)
--- a/logsd.h
+++ b/logsd.h
@@ -19,9 +19,9 @@ It is a child of the calculator class.*/
 class LogSD : public Calculator  {
        
 public:
-       LogSD() : Calculator("logsd", 3) {};
+       LogSD() : Calculator("logseries", 3) {};
        EstOutput getValues(SAbundVector*);
-       EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;};
+       EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) { return data; };
 
 private:
        double logS(double);
index 80c83728adccbcc691d1a082d95e4c07521a263f..faabc9874dbcd4b08739dc66cf2e8728c710fa48 100644 (file)
@@ -28,6 +28,7 @@ EstOutput NPShannon::getValues(SAbundVector* rank){
                                double ChatPi = Chat*pi;
                                if(ChatPi>0){
                                        npShannon += rank->get(i) * ChatPi*log(ChatPi)/(1-pow(1-ChatPi,(double)sampled));
+                                       cout << ChatPi << '\t' << rank->get(i) * ChatPi*log(ChatPi)/(1-pow(1-ChatPi,(double)sampled)) << endl;
                                }
                        }
                        npShannon = -npShannon;
diff --git a/qstat.h b/qstat.h
index 16b8b4d2f23d3dbe053c68ec1e9abf2214c9610d..b0c985a88a45d7ad88a6a868db0aee7097998f2c 100644 (file)
--- a/qstat.h
+++ b/qstat.h
@@ -10,7 +10,7 @@
  */
 #include "calculator.h"
 
-/*This class implements the LogSD estimator on single group. 
+/*This class implements the q statistic on single group. 
 It is a child of the calculator class.*/ 
 
 /***********************************************************************/
@@ -18,7 +18,7 @@ It is a child of the calculator class.*/
 class QStat : public Calculator  {
        
 public:
-       QStat() : Calculator("qstat", 3) {};
+       QStat() : Calculator("qstat", 1) {};
        EstOutput getValues(SAbundVector*);
        EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;};
 
index 3c7975749e126645b91b7b0b53755e5f63e35789..6e4197f7f8b4a9029709a9d4ab31f51c246d840b 100644 (file)
@@ -17,6 +17,7 @@
 #include "npshannon.h"
 #include "shannon.h"
 #include "jackknife.h"
+#include "coverage.h"
 
 //**********************************************************************************************************************
 
@@ -50,6 +51,8 @@ RareFactCommand::RareFactCommand(){
                                        rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
                                }else if (globaldata->Estimators[i] == "bootstrap") { 
                                        rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
+                               }else if (globaldata->Estimators[i] == "coverage") { 
+                                       rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
                                }else if (globaldata->Estimators[i] == "nseqs") { 
                                        rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
                                }
index 5045954a425db955349f1dea26ba1da18cf2f2c9..0d54b7b138d4cf861318126840f4208c2e842bca 100644 (file)
@@ -30,7 +30,7 @@ EstOutput Shannon::getValues(SAbundVector* rank){
                }
                shannon = -shannon;
     
-               double hvar = (hvara-pow(shannon,2))/(double)sampled+(double)sobs/(double)(2*sampled*sampled);
+               double hvar = (hvara-pow(shannon,2))/(double)sampled+(double)(sobs-1)/(double)(2*sampled*sampled);
     
                double ci = 0;
        
index 8632ad4a42c9c9dba91bcccf6528e7ced436f70b..39972cef649a1c34443b0ae36117a48f8b776cbd 100644 (file)
@@ -22,6 +22,7 @@
 #include "qstat.h"
 #include "bergerparker.h"
 #include "bstick.h"
+#include "coverage.h"
 
 //**********************************************************************************************************************
 
@@ -37,9 +38,11 @@ SummaryCommand::SummaryCommand(){
                                        sumCalculators.push_back(new Sobs());
                                }else if(globaldata->Estimators[i] == "chao"){
                                        sumCalculators.push_back(new Chao1());
-                               }else if(globaldata->Estimators[i] == "geom"){
+                               }else if(globaldata->Estimators[i] == "coverage"){
+                                       sumCalculators.push_back(new Coverage());
+                               }else if(globaldata->Estimators[i] == "geometric"){
                                        sumCalculators.push_back(new Geom());
-                               }else if(globaldata->Estimators[i] == "logsd"){
+                               }else if(globaldata->Estimators[i] == "logseries"){
                                        sumCalculators.push_back(new LogSD());
                                }else if(globaldata->Estimators[i] == "qstat"){
                                        sumCalculators.push_back(new QStat());
index 7d3477f68c2c8eb71c5c42ab27b1aceb7eb81472..25cc5c5868e46a9659fc88e924a6f03baf48b7da 100644 (file)
@@ -182,11 +182,13 @@ void ValidCalculators::initialSingle() {
                single["simpson"]           = "simpson";
                single["bergerparker"]  = "bergerparker";
                single["bootstrap"]     = "bootstrap";
-               single["geom"]          = "geom";
-               single["logsd"]         = "logsd";
+               single["geometric"]     = "geometric";
+               single["logseries"]         = "logseries";
                single["qstat"]         = "qstat";
                single["bstick"]        = "bstick";
                single["nseqs"]                 = "nseqs";
+               single["coverage"]              = "coverage";
+               
                single["default"]           = "default";
        }
        catch(exception& e) {
@@ -247,6 +249,7 @@ void ValidCalculators::initialRarefaction() {
                rarefaction["simpson"]          = "simpson";
                rarefaction["bootstrap"]        = "bootstrap";
                rarefaction["nseqs"]            = "nseqs";
+               rarefaction["coverage"]         = "coverage";
                rarefaction["default"]      = "default";
        }
        catch(exception& e) {
@@ -271,12 +274,13 @@ void ValidCalculators::initialSummary() {
                summary["npshannon"]    = "npshannon";
                summary["simpson"]              = "simpson";
                summary["bergerparker"] = "bergerparker";
-               summary["geom"]         = "geom";
+               summary["geometric"]    = "geometric";
                summary["bootstrap"]    = "bootstrap";
                summary["logsd"]        = "logsd";
                summary["qstat"]        = "qstat";
                summary["bstick"]       = "bstick";
                summary["nseqs"]                = "nseqs";
+               summary["coverage"]             = "coverage";
                summary["default"]          = "default";
        }
        catch(exception& e) {