if(denom <= 0.0){ term1=0.0000; } else { term1 = (double)(srare * numsum)/(double)denom - 1.0; }
if(term1 >= 0.0){ gamace = term1; } else { gamace = 0.0; }
-
+
if(gamace >= 0.64){
gamace = gamace * (1 + (nrare * (1 - Cace) * numsum) / denom);
if(gamace<0){ gamace = 0; }
try {
data.resize(1,0);
//Berger-Parker index
- double BP = (double)rank->getNumSeqs()/(double)rank->getMaxRank();
- //cout << "BP index = " << 1/BP << "\n\n";
+ double BP = (double)rank->getMaxRank() / (double)rank->getNumSeqs();
data[0] = BP;
if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
}
/***********************************************************************/
-double logS(double num)
-{
- return -(1-num)*log(1-num)/num;
-}
-/***********************************************************************/
-/*void LogSD::doLogSD(vector<double> indVec, vector<double> specVec) //indVec = individuals vector, specVec = species vector
-{ try {
- VecCalc vecCalc;
- double numSpec = vecCalc.sumElements(specVec);//numSpec = The total number of species
- cout << "number of species = " << numSpec << "\n";
- double numInd = vecCalc.sumElements(vecCalc.multVecs(indVec, specVec));
- double snRatio = numSpec/numInd;
- double x = .5;
- double step = .4999999999;
- while(fabs(snRatio - logS(x)) > .00001) //This uses a binary search to find the value of x.
- {
- if(logS(x) > snRatio)
- x += step;
- else
- x -= step;
- step /= 2;
- }
- double alpha = numInd*(1-x)/x;
-
- int ind;
- cout << "Number of individuals:"; //Ask the user for the number of individuals.
- cin >> ind;
- double spec = alpha*pow(x, ind)/ind;
- cout << "Number of species expected = " << spec << "\n" << "X value = " << x << "\n" << "Alpha value= " << alpha << "\n";//Outputs the number of species expected with the given number of individuals.
-
- vector<double> obsSpec;
- vector<double> cObsSpec;
- vector<double> expSpec;
- vector<double> cExpSpec;
- vector<double> cDiff;
-
- // Generates the cumulative observed species vector.
- int oct = 1;
- double octSumObs = 0;
- for(int y = 0; y < specVec.size(); y++)
- {
- if(indVec.at(y) - .5 < pow(2.0, oct))
- octSumObs += specVec.at(y);
- else
- {
- obsSpec.push_back(octSumObs);
- octSumObs = specVec.at(y);
- oct++;
- }
- if(y == specVec.size()-1)
- obsSpec.push_back(octSumObs);
- }
- cObsSpec = vecCalc.genCVec(obsSpec);
- cObsSpec = vecCalc.add(cObsSpec,-.5);
-
- // Generates the cumulative expected species vector.
- oct = 1;
- double octSumExp = 0;
- for(int g = 1; g <= indVec.at(indVec.size()-1); g++)
- {
- if(g - .5 < pow(2.0, oct))
- octSumExp += alpha*pow(x,g)/(g);
- else
- {
- expSpec.push_back(octSumExp);
- octSumExp = alpha*pow(x,g)/(g);
- oct++;
- }
- if(g == indVec.at(indVec.size()-1))
- expSpec.push_back(octSumExp);
- }
- cExpSpec = vecCalc.genCVec(expSpec);
-
- // Statistical Analysis
- double dTStat = vecCalc.findDStat(cObsSpec, cExpSpec, numSpec);
- cout << "D Test Statistic = " << dTStat << "\n";
- cout << ".05 confidence value = " << .89196/sqrt(numSpec) << "\n";
- cout << ".01 confidence value = " << 1.0471/sqrt(numSpec) << "\n\n";
- }
- catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the LogSD class Function doLogSD. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the LogSD class function doLogSD. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
-}*/
-/***********************************************************************/
void QStatistic::doQStat(vector<double> vec)//vec = The data vector.
{ try {
VecCalc vecCalc;
vector<string> getSData(char[]);//This takes a file name as a parameter and reads all of the data in the file into a <string> vector.
};
-/**************************************************************************************************/
-/*This Class contains methods that return the B Diverstiy of two sets
-of data. The four methods are the Whittaker's measure, the Marczewski-Stainhaus distance,
-the Sorensen quantitative index, and the Morisita-Horn index.
-The main method takes a number of columns of data and performs all 4 methods on each
-combination of columns. It prints a table for every method that shows the B Diverstiy for
-each combination. It also calculates the overall diversity for Whittaker's measure and
-the Marczewski-Steinhaus distance.*/
-
-
-/*class BDiversity
-{
- public:
- void doBD(vector<double>, double);//Main method
- double getWhitt(vector<double>,vector<double>);//Whittacker's measure
- double getMS(vector<double>, vector<double>);//Marczewski-Stainhaus distance
- double getSor(vector<double>, vector<double>);//Sorensen quantitative index
- double getMor(vector<double>, vector<double>);//Morisita-Horn index
- void printD(vector<vector<double> >, int);//This prints a table that represents the given 2D vector, the second paramter specifies which method is to be used (1 for Whitt, 2 for MS, 3 for Sor, and 4 for Mor)
-};*/
-
/**************************************************************************************************/
/*This Class is similar to the GeometricSeries.h class. It calculates
public:
void doKSTest(vector<double>, vector<double>);
};
-/**************************************************************************************************/
-/*This Class calculates the Log Series Distribution for the data.
-It then generates a D-Statistic and prints the D-Statistic and
-the critical values for the Kolmogorov-Smirnov 1 sample test.*/
-
-/*class LogSD
-{
- public:
- void doLogSD(vector<double>, vector<double>);
-};*/
/**************************************************************************************************/
//This Class calculates and prints the Q-Statistic for the data.
double doubles = (double)rank->get(2);
double chaovar = 0.0000;
- double chao = sobs + pow(singles,2)/(2*(doubles+1)) - (singles*doubles/(2*pow(doubles+1,2)));
+ double chao = sobs + singles*(singles-1)/(2*(doubles+1));
- if(doubles==0){chaovar=0;}
- else{
- double g=singles/doubles;
- chaovar = doubles*(0.25*pow(g,4)+pow(g,3)+0.5*pow(g,2));
+ if(singles > 0 && doubles > 0){
+ chaovar = singles*(singles-1)/(2*(doubles+1))
+ + singles*pow(2*singles-1, 2)/(4*pow(doubles+1,2))
+ + pow(singles, 2)*doubles*pow(singles-1, 2)/(4*pow(doubles+1,4));
}
-
-
+ else if(singles > 0 && doubles == 0){
+ chaovar = singles*(singles-1)/2 + singles*pow(2*singles-1, 2)/4 - pow(singles, 4)/(4*chao);
+ }
+ else if(singles == 0){
+ chaovar = sobs*exp(-1*rank->getNumSeqs()/sobs)*(1-exp(-1*rank->getNumSeqs()/sobs));
+ }
+
double chaohci, chaolci;
- if(chao==sobs){
- double ci = 1.96*pow(chaovar,0.5);
- chaolci = chao-ci;//chao lci
- chaohci = chao+ci;//chao hci
- }
- else{
+ if(singles>0){
double denom = pow(chao-sobs,2);
double c = exp(1.96*pow((log(1+chaovar/denom)),0.5));
chaolci = sobs+(chao-sobs)/c;//chao lci
chaohci = sobs+(chao-sobs)*c;//chao hci
}
+ else{
+ double p = exp(-1*rank->getNumSeqs()/sobs);
+ chaolci = sobs/(1-p)-1.96*pow(sobs*p/(1-p), 0.5);
+ chaohci = sobs/(1-p)+1.96*pow(sobs*p/(1-p), 0.5);
+ if(chaolci < sobs){ chaolci = sobs; }
+ }
data[0] = chao;
data[1] = chaolci;
#include "logsd.h"
#include "bergerparker.h"
#include "bstick.h"
-
+#include "coverage.h"
//**********************************************************************************************************************
CollectCommand::CollectCommand(){
cDisplays.push_back(new CollectDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"chao")));
}else if (globaldata->Estimators[i] == "nseqs") {
cDisplays.push_back(new CollectDisplay(new NSeqs(), new OneColumnFile(fileNameRoot+"nseqs")));
+ }else if (globaldata->Estimators[i] == "coverage") {
+ cDisplays.push_back(new CollectDisplay(new Coverage(), new OneColumnFile(fileNameRoot+"coverage")));
}else if (globaldata->Estimators[i] == "ace") {
convert(globaldata->getAbund(), abund);
cDisplays.push_back(new CollectDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"ace")));
cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"simpson")));
}else if (globaldata->Estimators[i] == "bootstrap") {
cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+"bootstrap")));
- }else if (globaldata->Estimators[i] == "geom") {
- cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geom")));
+ }else if (globaldata->Estimators[i] == "geometric") {
+ cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geometric")));
}else if (globaldata->Estimators[i] == "qstat") {
cDisplays.push_back(new CollectDisplay(new QStat(), new OneColumnFile(fileNameRoot+"qstat")));
- }else if (globaldata->Estimators[i] == "logsd") {
- cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logsd")));
+ }else if (globaldata->Estimators[i] == "logseries") {
+ cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logseries")));
}else if (globaldata->Estimators[i] == "bergerparker") {
cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+"bergerparker")));
}else if (globaldata->Estimators[i] == "bstick") {
/***********************************************************************************/
EstOutput Geom::getValues(SAbundVector* rank){
try {
- data.resize(2,0);
+ data.resize(3,0);
rdata = getRAbundVector(rank);
int numInd = rdata.getNumSeqs();
double sumExp = 0;
double sumObs = 0;
double maxDiff = 0;
-
- for(int i = 0; i < rdata.size(); i++)
+
+ for(int i = 0; i < numSpec; i++)
{
sumObs += rdata.get(i);
sumExp += numInd*cK*k*pow(1-k, i);
+
double diff = fabs(sumObs-sumExp);
- if(diff > maxDiff)
- maxDiff = diff;
+ if(diff > maxDiff) { maxDiff = diff; }
+
}
double DStatistic = maxDiff/numInd;
cout << "Critical value for 95% confidence interval = ";*/
if(rdata.size() > 20)
{
- critVal = .886/sqrt(rdata.size());
+ data[1] = 1.031/sqrt(rdata.size());
+ data[2] = 0.886/sqrt(rdata.size());
}
else
{
cout << "If D-Statistic is less than the critical value then the data fits the Geometric Series model w/ 95% confidence.\n\n";*/
data[0] = DStatistic;
- data[1] = critVal;
if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; }
+ if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; }
return data;
}
class Geom : public Calculator {
public:
- Geom() : Calculator("geom", 3) {};
+ Geom() : Calculator("geometric", 3) {};
EstOutput getValues(SAbundVector*);
EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;};
SAbundVector rankw = SAbundVector(dvec, mr,nb,ns);
SAbundVector *rank = &rankw;*/
- data.resize(2,0);
+ data.resize(3,0);
int numInd = rank->getNumSeqs();
int numSpec = rank->getNumBins();
double snRatio = (double)numSpec/numInd;
class LogSD : public Calculator {
public:
- LogSD() : Calculator("logsd", 3) {};
+ LogSD() : Calculator("logseries", 3) {};
EstOutput getValues(SAbundVector*);
- EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;};
+ EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) { return data; };
private:
double logS(double);
double ChatPi = Chat*pi;
if(ChatPi>0){
npShannon += rank->get(i) * ChatPi*log(ChatPi)/(1-pow(1-ChatPi,(double)sampled));
+ cout << ChatPi << '\t' << rank->get(i) * ChatPi*log(ChatPi)/(1-pow(1-ChatPi,(double)sampled)) << endl;
}
}
npShannon = -npShannon;
*/
#include "calculator.h"
-/*This class implements the LogSD estimator on single group.
+/*This class implements the q statistic on single group.
It is a child of the calculator class.*/
/***********************************************************************/
class QStat : public Calculator {
public:
- QStat() : Calculator("qstat", 3) {};
+ QStat() : Calculator("qstat", 1) {};
EstOutput getValues(SAbundVector*);
EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;};
#include "npshannon.h"
#include "shannon.h"
#include "jackknife.h"
+#include "coverage.h"
//**********************************************************************************************************************
rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
}else if (globaldata->Estimators[i] == "bootstrap") {
rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
+ }else if (globaldata->Estimators[i] == "coverage") {
+ rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
}else if (globaldata->Estimators[i] == "nseqs") {
rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
}
}
shannon = -shannon;
- double hvar = (hvara-pow(shannon,2))/(double)sampled+(double)sobs/(double)(2*sampled*sampled);
+ double hvar = (hvara-pow(shannon,2))/(double)sampled+(double)(sobs-1)/(double)(2*sampled*sampled);
double ci = 0;
#include "qstat.h"
#include "bergerparker.h"
#include "bstick.h"
+#include "coverage.h"
//**********************************************************************************************************************
sumCalculators.push_back(new Sobs());
}else if(globaldata->Estimators[i] == "chao"){
sumCalculators.push_back(new Chao1());
- }else if(globaldata->Estimators[i] == "geom"){
+ }else if(globaldata->Estimators[i] == "coverage"){
+ sumCalculators.push_back(new Coverage());
+ }else if(globaldata->Estimators[i] == "geometric"){
sumCalculators.push_back(new Geom());
- }else if(globaldata->Estimators[i] == "logsd"){
+ }else if(globaldata->Estimators[i] == "logseries"){
sumCalculators.push_back(new LogSD());
}else if(globaldata->Estimators[i] == "qstat"){
sumCalculators.push_back(new QStat());
single["simpson"] = "simpson";
single["bergerparker"] = "bergerparker";
single["bootstrap"] = "bootstrap";
- single["geom"] = "geom";
- single["logsd"] = "logsd";
+ single["geometric"] = "geometric";
+ single["logseries"] = "logseries";
single["qstat"] = "qstat";
single["bstick"] = "bstick";
single["nseqs"] = "nseqs";
+ single["coverage"] = "coverage";
+
single["default"] = "default";
}
catch(exception& e) {
rarefaction["simpson"] = "simpson";
rarefaction["bootstrap"] = "bootstrap";
rarefaction["nseqs"] = "nseqs";
+ rarefaction["coverage"] = "coverage";
rarefaction["default"] = "default";
}
catch(exception& e) {
summary["npshannon"] = "npshannon";
summary["simpson"] = "simpson";
summary["bergerparker"] = "bergerparker";
- summary["geom"] = "geom";
+ summary["geometric"] = "geometric";
summary["bootstrap"] = "bootstrap";
summary["logsd"] = "logsd";
summary["qstat"] = "qstat";
summary["bstick"] = "bstick";
summary["nseqs"] = "nseqs";
+ summary["coverage"] = "coverage";
summary["default"] = "default";
}
catch(exception& e) {