From e4827e0945cbda536064e5a345996b2a7dfcbb81 Mon Sep 17 00:00:00 2001 From: pschloss Date: Wed, 22 Apr 2009 22:09:36 +0000 Subject: [PATCH] modified some calculators and other stuff - pds --- ace.cpp | 2 +- bergerparker.cpp | 3 +- calculator.cpp | 89 --------------------------------------------- calculator.h | 31 ---------------- chao1.cpp | 32 +++++++++------- collectcommand.cpp | 12 +++--- geom.cpp | 16 ++++---- geom.h | 2 +- logsd.cpp | 2 +- logsd.h | 4 +- npshannon.cpp | 1 + qstat.h | 4 +- rarefactcommand.cpp | 3 ++ shannon.cpp | 2 +- summarycommand.cpp | 7 +++- validcalculator.cpp | 10 +++-- 16 files changed, 60 insertions(+), 160 deletions(-) diff --git a/ace.cpp b/ace.cpp index 17049bd..54d0946 100644 --- a/ace.cpp +++ b/ace.cpp @@ -42,7 +42,7 @@ EstOutput Ace::getValues(SAbundVector* rank) { if(denom <= 0.0){ term1=0.0000; } else { term1 = (double)(srare * numsum)/(double)denom - 1.0; } if(term1 >= 0.0){ gamace = term1; } else { gamace = 0.0; } - + if(gamace >= 0.64){ gamace = gamace * (1 + (nrare * (1 - Cace) * numsum) / denom); if(gamace<0){ gamace = 0; } diff --git a/bergerparker.cpp b/bergerparker.cpp index 893f2a9..c9f3565 100644 --- a/bergerparker.cpp +++ b/bergerparker.cpp @@ -15,8 +15,7 @@ EstOutput BergerParker::getValues(SAbundVector* rank){ try { data.resize(1,0); //Berger-Parker index - double BP = (double)rank->getNumSeqs()/(double)rank->getMaxRank(); - //cout << "BP index = " << 1/BP << "\n\n"; + double BP = (double)rank->getMaxRank() / (double)rank->getNumSeqs(); data[0] = BP; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } diff --git a/calculator.cpp b/calculator.cpp index ffd452a..71aa99c 100644 --- a/calculator.cpp +++ b/calculator.cpp @@ -746,96 +746,7 @@ void KS2SampleTest::doKSTest(vector abun1, vector abun2)//abun1 } /***********************************************************************/ -double logS(double num) -{ - return -(1-num)*log(1-num)/num; -} -/***********************************************************************/ -/*void LogSD::doLogSD(vector indVec, vector specVec) //indVec = individuals vector, specVec = species vector -{ try { - VecCalc vecCalc; - double numSpec = vecCalc.sumElements(specVec);//numSpec = The total number of species - cout << "number of species = " << numSpec << "\n"; - double numInd = vecCalc.sumElements(vecCalc.multVecs(indVec, specVec)); - double snRatio = numSpec/numInd; - double x = .5; - double step = .4999999999; - while(fabs(snRatio - logS(x)) > .00001) //This uses a binary search to find the value of x. - { - if(logS(x) > snRatio) - x += step; - else - x -= step; - step /= 2; - } - double alpha = numInd*(1-x)/x; - - int ind; - cout << "Number of individuals:"; //Ask the user for the number of individuals. - cin >> ind; - double spec = alpha*pow(x, ind)/ind; - cout << "Number of species expected = " << spec << "\n" << "X value = " << x << "\n" << "Alpha value= " << alpha << "\n";//Outputs the number of species expected with the given number of individuals. - - vector obsSpec; - vector cObsSpec; - vector expSpec; - vector cExpSpec; - vector cDiff; - - // Generates the cumulative observed species vector. - int oct = 1; - double octSumObs = 0; - for(int y = 0; y < specVec.size(); y++) - { - if(indVec.at(y) - .5 < pow(2.0, oct)) - octSumObs += specVec.at(y); - else - { - obsSpec.push_back(octSumObs); - octSumObs = specVec.at(y); - oct++; - } - if(y == specVec.size()-1) - obsSpec.push_back(octSumObs); - } - cObsSpec = vecCalc.genCVec(obsSpec); - cObsSpec = vecCalc.add(cObsSpec,-.5); - - // Generates the cumulative expected species vector. - oct = 1; - double octSumExp = 0; - for(int g = 1; g <= indVec.at(indVec.size()-1); g++) - { - if(g - .5 < pow(2.0, oct)) - octSumExp += alpha*pow(x,g)/(g); - else - { - expSpec.push_back(octSumExp); - octSumExp = alpha*pow(x,g)/(g); - oct++; - } - if(g == indVec.at(indVec.size()-1)) - expSpec.push_back(octSumExp); - } - cExpSpec = vecCalc.genCVec(expSpec); - - // Statistical Analysis - double dTStat = vecCalc.findDStat(cObsSpec, cExpSpec, numSpec); - cout << "D Test Statistic = " << dTStat << "\n"; - cout << ".05 confidence value = " << .89196/sqrt(numSpec) << "\n"; - cout << ".01 confidence value = " << 1.0471/sqrt(numSpec) << "\n\n"; - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the LogSD class Function doLogSD. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the LogSD class function doLogSD. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -}*/ -/***********************************************************************/ void QStatistic::doQStat(vector vec)//vec = The data vector. { try { VecCalc vecCalc; diff --git a/calculator.h b/calculator.h index 53e5c5a..4cc3280 100644 --- a/calculator.h +++ b/calculator.h @@ -79,27 +79,6 @@ class VecCalc vector getSData(char[]);//This takes a file name as a parameter and reads all of the data in the file into a vector. }; -/**************************************************************************************************/ -/*This Class contains methods that return the B Diverstiy of two sets -of data. The four methods are the Whittaker's measure, the Marczewski-Stainhaus distance, -the Sorensen quantitative index, and the Morisita-Horn index. -The main method takes a number of columns of data and performs all 4 methods on each -combination of columns. It prints a table for every method that shows the B Diverstiy for -each combination. It also calculates the overall diversity for Whittaker's measure and -the Marczewski-Steinhaus distance.*/ - - -/*class BDiversity -{ - public: - void doBD(vector, double);//Main method - double getWhitt(vector,vector);//Whittacker's measure - double getMS(vector, vector);//Marczewski-Stainhaus distance - double getSor(vector, vector);//Sorensen quantitative index - double getMor(vector, vector);//Morisita-Horn index - void printD(vector >, int);//This prints a table that represents the given 2D vector, the second paramter specifies which method is to be used (1 for Whitt, 2 for MS, 3 for Sor, and 4 for Mor) -};*/ - /**************************************************************************************************/ /*This Class is similar to the GeometricSeries.h class. It calculates @@ -143,16 +122,6 @@ class KS2SampleTest public: void doKSTest(vector, vector); }; -/**************************************************************************************************/ -/*This Class calculates the Log Series Distribution for the data. -It then generates a D-Statistic and prints the D-Statistic and -the critical values for the Kolmogorov-Smirnov 1 sample test.*/ - -/*class LogSD -{ - public: - void doLogSD(vector, vector); -};*/ /**************************************************************************************************/ //This Class calculates and prints the Q-Statistic for the data. diff --git a/chao1.cpp b/chao1.cpp index d938eb4..c05bdef 100644 --- a/chao1.cpp +++ b/chao1.cpp @@ -20,28 +20,34 @@ EstOutput Chao1::getValues(SAbundVector* rank){ double doubles = (double)rank->get(2); double chaovar = 0.0000; - double chao = sobs + pow(singles,2)/(2*(doubles+1)) - (singles*doubles/(2*pow(doubles+1,2))); + double chao = sobs + singles*(singles-1)/(2*(doubles+1)); - if(doubles==0){chaovar=0;} - else{ - double g=singles/doubles; - chaovar = doubles*(0.25*pow(g,4)+pow(g,3)+0.5*pow(g,2)); + if(singles > 0 && doubles > 0){ + chaovar = singles*(singles-1)/(2*(doubles+1)) + + singles*pow(2*singles-1, 2)/(4*pow(doubles+1,2)) + + pow(singles, 2)*doubles*pow(singles-1, 2)/(4*pow(doubles+1,4)); } - - + else if(singles > 0 && doubles == 0){ + chaovar = singles*(singles-1)/2 + singles*pow(2*singles-1, 2)/4 - pow(singles, 4)/(4*chao); + } + else if(singles == 0){ + chaovar = sobs*exp(-1*rank->getNumSeqs()/sobs)*(1-exp(-1*rank->getNumSeqs()/sobs)); + } + double chaohci, chaolci; - if(chao==sobs){ - double ci = 1.96*pow(chaovar,0.5); - chaolci = chao-ci;//chao lci - chaohci = chao+ci;//chao hci - } - else{ + if(singles>0){ double denom = pow(chao-sobs,2); double c = exp(1.96*pow((log(1+chaovar/denom)),0.5)); chaolci = sobs+(chao-sobs)/c;//chao lci chaohci = sobs+(chao-sobs)*c;//chao hci } + else{ + double p = exp(-1*rank->getNumSeqs()/sobs); + chaolci = sobs/(1-p)-1.96*pow(sobs*p/(1-p), 0.5); + chaohci = sobs/(1-p)+1.96*pow(sobs*p/(1-p), 0.5); + if(chaolci < sobs){ chaolci = sobs; } + } data[0] = chao; data[1] = chaolci; diff --git a/collectcommand.cpp b/collectcommand.cpp index 62258a8..2687f92 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -22,7 +22,7 @@ #include "logsd.h" #include "bergerparker.h" #include "bstick.h" - +#include "coverage.h" //********************************************************************************************************************** CollectCommand::CollectCommand(){ @@ -40,6 +40,8 @@ CollectCommand::CollectCommand(){ cDisplays.push_back(new CollectDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"chao"))); }else if (globaldata->Estimators[i] == "nseqs") { cDisplays.push_back(new CollectDisplay(new NSeqs(), new OneColumnFile(fileNameRoot+"nseqs"))); + }else if (globaldata->Estimators[i] == "coverage") { + cDisplays.push_back(new CollectDisplay(new Coverage(), new OneColumnFile(fileNameRoot+"coverage"))); }else if (globaldata->Estimators[i] == "ace") { convert(globaldata->getAbund(), abund); cDisplays.push_back(new CollectDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"ace"))); @@ -53,12 +55,12 @@ CollectCommand::CollectCommand(){ cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"simpson"))); }else if (globaldata->Estimators[i] == "bootstrap") { cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+"bootstrap"))); - }else if (globaldata->Estimators[i] == "geom") { - cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geom"))); + }else if (globaldata->Estimators[i] == "geometric") { + cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geometric"))); }else if (globaldata->Estimators[i] == "qstat") { cDisplays.push_back(new CollectDisplay(new QStat(), new OneColumnFile(fileNameRoot+"qstat"))); - }else if (globaldata->Estimators[i] == "logsd") { - cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logsd"))); + }else if (globaldata->Estimators[i] == "logseries") { + cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logseries"))); }else if (globaldata->Estimators[i] == "bergerparker") { cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+"bergerparker"))); }else if (globaldata->Estimators[i] == "bstick") { diff --git a/geom.cpp b/geom.cpp index 1b8c4ec..31580db 100644 --- a/geom.cpp +++ b/geom.cpp @@ -42,7 +42,7 @@ RAbundVector Geom::getRAbundVector(SAbundVector* rank){ /***********************************************************************************/ EstOutput Geom::getValues(SAbundVector* rank){ try { - data.resize(2,0); + data.resize(3,0); rdata = getRAbundVector(rank); int numInd = rdata.getNumSeqs(); @@ -64,14 +64,15 @@ EstOutput Geom::getValues(SAbundVector* rank){ double sumExp = 0; double sumObs = 0; double maxDiff = 0; - - for(int i = 0; i < rdata.size(); i++) + + for(int i = 0; i < numSpec; i++) { sumObs += rdata.get(i); sumExp += numInd*cK*k*pow(1-k, i); + double diff = fabs(sumObs-sumExp); - if(diff > maxDiff) - maxDiff = diff; + if(diff > maxDiff) { maxDiff = diff; } + } double DStatistic = maxDiff/numInd; @@ -81,7 +82,8 @@ EstOutput Geom::getValues(SAbundVector* rank){ cout << "Critical value for 95% confidence interval = ";*/ if(rdata.size() > 20) { - critVal = .886/sqrt(rdata.size()); + data[1] = 1.031/sqrt(rdata.size()); + data[2] = 0.886/sqrt(rdata.size()); } else { @@ -92,10 +94,10 @@ EstOutput Geom::getValues(SAbundVector* rank){ cout << "If D-Statistic is less than the critical value then the data fits the Geometric Series model w/ 95% confidence.\n\n";*/ data[0] = DStatistic; - data[1] = critVal; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } + if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } return data; } diff --git a/geom.h b/geom.h index b9da73f..05fc0d7 100644 --- a/geom.h +++ b/geom.h @@ -19,7 +19,7 @@ It is a child of the calculator class. */ class Geom : public Calculator { public: - Geom() : Calculator("geom", 3) {}; + Geom() : Calculator("geometric", 3) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; diff --git a/logsd.cpp b/logsd.cpp index fddee4f..14d2e44 100644 --- a/logsd.cpp +++ b/logsd.cpp @@ -28,7 +28,7 @@ EstOutput LogSD::getValues(SAbundVector* rank){ SAbundVector rankw = SAbundVector(dvec, mr,nb,ns); SAbundVector *rank = &rankw;*/ - data.resize(2,0); + data.resize(3,0); int numInd = rank->getNumSeqs(); int numSpec = rank->getNumBins(); double snRatio = (double)numSpec/numInd; diff --git a/logsd.h b/logsd.h index 02c2ca5..7bb12c3 100644 --- a/logsd.h +++ b/logsd.h @@ -19,9 +19,9 @@ It is a child of the calculator class.*/ class LogSD : public Calculator { public: - LogSD() : Calculator("logsd", 3) {}; + LogSD() : Calculator("logseries", 3) {}; EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) { return data; }; private: double logS(double); diff --git a/npshannon.cpp b/npshannon.cpp index 80c8372..faabc98 100644 --- a/npshannon.cpp +++ b/npshannon.cpp @@ -28,6 +28,7 @@ EstOutput NPShannon::getValues(SAbundVector* rank){ double ChatPi = Chat*pi; if(ChatPi>0){ npShannon += rank->get(i) * ChatPi*log(ChatPi)/(1-pow(1-ChatPi,(double)sampled)); + cout << ChatPi << '\t' << rank->get(i) * ChatPi*log(ChatPi)/(1-pow(1-ChatPi,(double)sampled)) << endl; } } npShannon = -npShannon; diff --git a/qstat.h b/qstat.h index 16b8b4d..b0c985a 100644 --- a/qstat.h +++ b/qstat.h @@ -10,7 +10,7 @@ */ #include "calculator.h" -/*This class implements the LogSD estimator on single group. +/*This class implements the q statistic on single group. It is a child of the calculator class.*/ /***********************************************************************/ @@ -18,7 +18,7 @@ It is a child of the calculator class.*/ class QStat : public Calculator { public: - QStat() : Calculator("qstat", 3) {}; + QStat() : Calculator("qstat", 1) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index 3c79757..6e4197f 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -17,6 +17,7 @@ #include "npshannon.h" #include "shannon.h" #include "jackknife.h" +#include "coverage.h" //********************************************************************************************************************** @@ -50,6 +51,8 @@ RareFactCommand::RareFactCommand(){ rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson"))); }else if (globaldata->Estimators[i] == "bootstrap") { rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap"))); + }else if (globaldata->Estimators[i] == "coverage") { + rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage"))); }else if (globaldata->Estimators[i] == "nseqs") { rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs"))); } diff --git a/shannon.cpp b/shannon.cpp index 5045954..0d54b7b 100644 --- a/shannon.cpp +++ b/shannon.cpp @@ -30,7 +30,7 @@ EstOutput Shannon::getValues(SAbundVector* rank){ } shannon = -shannon; - double hvar = (hvara-pow(shannon,2))/(double)sampled+(double)sobs/(double)(2*sampled*sampled); + double hvar = (hvara-pow(shannon,2))/(double)sampled+(double)(sobs-1)/(double)(2*sampled*sampled); double ci = 0; diff --git a/summarycommand.cpp b/summarycommand.cpp index 8632ad4..39972ce 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -22,6 +22,7 @@ #include "qstat.h" #include "bergerparker.h" #include "bstick.h" +#include "coverage.h" //********************************************************************************************************************** @@ -37,9 +38,11 @@ SummaryCommand::SummaryCommand(){ sumCalculators.push_back(new Sobs()); }else if(globaldata->Estimators[i] == "chao"){ sumCalculators.push_back(new Chao1()); - }else if(globaldata->Estimators[i] == "geom"){ + }else if(globaldata->Estimators[i] == "coverage"){ + sumCalculators.push_back(new Coverage()); + }else if(globaldata->Estimators[i] == "geometric"){ sumCalculators.push_back(new Geom()); - }else if(globaldata->Estimators[i] == "logsd"){ + }else if(globaldata->Estimators[i] == "logseries"){ sumCalculators.push_back(new LogSD()); }else if(globaldata->Estimators[i] == "qstat"){ sumCalculators.push_back(new QStat()); diff --git a/validcalculator.cpp b/validcalculator.cpp index 7d3477f..25cc5c5 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -182,11 +182,13 @@ void ValidCalculators::initialSingle() { single["simpson"] = "simpson"; single["bergerparker"] = "bergerparker"; single["bootstrap"] = "bootstrap"; - single["geom"] = "geom"; - single["logsd"] = "logsd"; + single["geometric"] = "geometric"; + single["logseries"] = "logseries"; single["qstat"] = "qstat"; single["bstick"] = "bstick"; single["nseqs"] = "nseqs"; + single["coverage"] = "coverage"; + single["default"] = "default"; } catch(exception& e) { @@ -247,6 +249,7 @@ void ValidCalculators::initialRarefaction() { rarefaction["simpson"] = "simpson"; rarefaction["bootstrap"] = "bootstrap"; rarefaction["nseqs"] = "nseqs"; + rarefaction["coverage"] = "coverage"; rarefaction["default"] = "default"; } catch(exception& e) { @@ -271,12 +274,13 @@ void ValidCalculators::initialSummary() { summary["npshannon"] = "npshannon"; summary["simpson"] = "simpson"; summary["bergerparker"] = "bergerparker"; - summary["geom"] = "geom"; + summary["geometric"] = "geometric"; summary["bootstrap"] = "bootstrap"; summary["logsd"] = "logsd"; summary["qstat"] = "qstat"; summary["bstick"] = "bstick"; summary["nseqs"] = "nseqs"; + summary["coverage"] = "coverage"; summary["default"] = "default"; } catch(exception& e) { -- 2.39.2