From: westcott Date: Mon, 18 May 2009 18:24:13 +0000 (+0000) Subject: added boneh, efron, and solow calculators X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=37a2c0b7dc68bfa53c8f38a713259929c4c46a9f added boneh, efron, and solow calculators --- diff --git a/boneh.cpp b/boneh.cpp new file mode 100644 index 0000000..7b52683 --- /dev/null +++ b/boneh.cpp @@ -0,0 +1,87 @@ +/* + * boneh.cpp + * Mothur + * + * Created by Thomas Ryabin on 5/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "boneh.h" +#include + +/***********************************************************************/ +double Boneh::getV(double f1, double n, double rs) { + + //cout << "f1 = " << f1 << "\nn = " << n << "\nrs = " << rs << "\n\n"; + + double accuracy = .0001; + double v = 100000.0; + double step = v/2; + double ls = v * (1 - pow((1 - f1/(n*v)), n)); + + //cout << "ls = " << ls << "\n"; + + while(abs(ls - rs) > accuracy) { + if(ls > rs) + v -= step; + else + v += step; + + ls = v* (1 - pow((1 - f1/(n*v)), n)); + step /= 2; + + // cout << "ls = " << ls << "\n"; + } + + return v; +} + +/***********************************************************************/ +EstOutput Boneh::getValues(SAbundVector* rank){ + + try { + data.resize(1,0); + + bool valid = false; + double sum = 0; + double n = (double)rank->size() - 1; + double f1 = (double)rank->get(1); + + for(int i = 1; i < rank->size(); i++) + sum += (double)rank->get(i) * exp(-i); + + if(rank->get(1) > sum) + valid = true; + + sum = 0; + if(valid) { + for(int j = 1; j < rank->size(); j++) + sum += rank->get(j) * pow((1 - (double)j / n), n); + + double v = getV(f1, n, sum); + + // cout << "v = " << v << "\n"; + + sum = 0; + for(int j = 1; j < rank->size(); j++) + sum += pow(1 - (double)rank->get(j) / n, n) * (1 - pow(1 - (double)rank->get(j) / n, m)) + v * pow(1 - f1/(n*v), n) * (1 - pow(1 - f1/(n*v), m)); + + } + + data[0] = sum; + + return data; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +}; + + +/***********************************************************************/ diff --git a/boneh.h b/boneh.h new file mode 100644 index 0000000..919200c --- /dev/null +++ b/boneh.h @@ -0,0 +1,34 @@ +#ifndef BONEH_H +#define BONEH_H + +/* + * boneh.h + * Mothur + * + * Created by Thomas Ryabin on 5/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "calculator.h" + +/* This class implements the boneh calculator on single group. + It is a child of the calculator class. */ + +/***********************************************************************/ + +class Boneh : public Calculator { + +public: + Boneh(int size) : m(size), Calculator("boneh", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +private: + double getV(double, double, double); + int m; +}; + + +/***********************************************************************/ + +#endif diff --git a/collectcommand.cpp b/collectcommand.cpp index 233b7be..e429dff 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -23,7 +23,9 @@ #include "bergerparker.h" #include "bstick.h" #include "goodscoverage.h" - +#include "efron.h" +#include "boneh.h" +#include "solow.h" #include "coverage.h" @@ -71,6 +73,15 @@ CollectCommand::CollectCommand(){ cDisplays.push_back(new CollectDisplay(new BStick(), new ThreeColumnFile(fileNameRoot+"bstick"))); }else if (globaldata->Estimators[i] == "goodscoverage") { cDisplays.push_back(new CollectDisplay(new GoodsCoverage(), new OneColumnFile(fileNameRoot+"goodscoverage"))); + }else if (globaldata->Estimators[i] == "efron") { + convert(globaldata->getSize(), size); + cDisplays.push_back(new CollectDisplay(new Efron(size), new OneColumnFile(fileNameRoot+"efron"))); + }else if (globaldata->Estimators[i] == "boneh") { + convert(globaldata->getSize(), size); + cDisplays.push_back(new CollectDisplay(new Boneh(size), new OneColumnFile(fileNameRoot+"boneh"))); + }else if (globaldata->Estimators[i] == "solow") { + convert(globaldata->getSize(), size); + cDisplays.push_back(new CollectDisplay(new Solow(size), new OneColumnFile(fileNameRoot+"solow"))); } } } diff --git a/collectcommand.h b/collectcommand.h index 016f37e..8057f9f 100644 --- a/collectcommand.h +++ b/collectcommand.h @@ -49,7 +49,7 @@ private: Collect* cCurve; ValidCalculators* validCalculator; vector cDisplays; - int freq, abund; + int freq, abund, size; }; diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index 0ac767d..582628d 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -30,7 +30,7 @@ #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" #include "sharedjackknife.h" -#include "sharedwhittaker.h" +#include "whittaker.h" diff --git a/efron.cpp b/efron.cpp new file mode 100644 index 0000000..e843641 --- /dev/null +++ b/efron.cpp @@ -0,0 +1,44 @@ +/* + * efron.cpp + * Mothur + * + * Created by Thomas Ryabin on 5/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "efron.h" +#include + +/***********************************************************************/ +EstOutput Efron::getValues(SAbundVector* rank){ + + try { + data.resize(1,0); + + if(m > rank->size()-1) { + cout << "Error in the 'efron' calculator. 'size' must be less than the length of the smalled sabund vector.\n"; + data[0] = 0; + return data; + } + + double sum = 0; + for(int i = 1; i < rank->size(); i++) + sum += pow(-1, (double)(i+1)) * pow(((double)m / (double)(rank->size()-1)), i) * (double)(rank->get(i)); + + data[0] = sum; + + return data; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +}; + + +/***********************************************************************/ diff --git a/efron.h b/efron.h new file mode 100644 index 0000000..652159d --- /dev/null +++ b/efron.h @@ -0,0 +1,33 @@ +#ifndef EFRON_H +#define EFRON_H + +/* + * efron.h + * Mothur + * + * Created by Thomas Ryabin on 5/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "calculator.h" + +/* This class implements the efron calculator on single group. + It is a child of the calculator class. */ + +/***********************************************************************/ + +class Efron : public Calculator { + +public: + Efron(int size) : m(size), Calculator("efron", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +private: + int m; +}; + + +/***********************************************************************/ + +#endif diff --git a/errorchecking.cpp b/errorchecking.cpp index dfe9607..b460987 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -118,6 +118,8 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "scale" ) { scale = value; } if (parameter == "ends" ) { ends = value; } if (parameter == "processors" ) { processors = value; } + if (parameter == "size" ) { size = value; } + if (parameter == "template") { templatefile = value; } if (parameter == "search") { search = value; } if (parameter == "ksize") { ksize = value; } @@ -167,6 +169,7 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "scale" ) { scale = value; } if (parameter == "ends" ) { ends = value; } if (parameter == "processors" ) { processors = value; } + if (parameter == "size" ) { size = value; } if (parameter == "template") { templatefile = value; } if (parameter == "search") { search = value; } if (parameter == "ksize") { ksize = value; } diff --git a/errorchecking.h b/errorchecking.h index d6388f7..5057ab0 100644 --- a/errorchecking.h +++ b/errorchecking.h @@ -35,7 +35,7 @@ class ErrorCheck { void clear(); void refresh(); string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, nexusfile, clustalfile, treefile, sharedfile, cutoff, format; - string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary, randomtree, abund, sorted, trump, soft, filter, scale, ends, processors; + string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary, randomtree, abund, sorted, trump, soft, filter, scale, ends, processors, size; string templatefile, search, ksize, align, match, mismatch, gapopen, gapextend; string commandName, optionText; bool errorFree; diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index 6170d00..f4cc6d8 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -14,7 +14,7 @@ void FilterSeqsCommand::doTrump() { trump = globaldata->getTrump(); for(int i = 0; i < db->size(); i++) { Sequence cur = db->get(i); - string curAligned = cur.getUnaligned(); + string curAligned = cur.getAligned(); for(int j = 0; j < curAligned.length(); j++) { string curChar = curAligned.substr(j, 1); if(curChar.compare(trump) == 0) @@ -37,7 +37,7 @@ void FilterSeqsCommand::doSoft() { for(int i = 0; i < db->size(); i++) { Sequence cur = db->get(i); - string curAligned = cur.getUnaligned(); + string curAligned = cur.getAligned(); for(int j = 0; j < curAligned.length(); j++) { string curChar = curAligned.substr(j, 1); @@ -112,7 +112,7 @@ int FilterSeqsCommand::execute() { db = readSeqs->getDB(); //for(int i = 0; i < db->size(); i++) { -// cout << db->get(i).getLength() << "\n" << db->get(i).getName() << ": " << db->get(i).getUnaligned() << "\n\n"; +// cout << db->get(i).getLength() << "\n" << db->get(i).getName() << ": " << db->get(i).getAligned() << "\n\n"; // } for(int i = 0; i < db->get(0).getLength(); i++) @@ -140,7 +140,7 @@ int FilterSeqsCommand::execute() { SequenceDB newDB; for(int i = 0; i < db->size(); i++) { Sequence curSeq = db->get(i); - string curAligned = curSeq.getUnaligned(); + string curAligned = curSeq.getAligned(); string curName = curSeq.getName(); string newAligned = ""; for(int j = 0; j < curAligned.length(); j++) diff --git a/globaldata.cpp b/globaldata.cpp index b4239f3..88c5a13 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -85,6 +85,11 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "scale") { scale = value; } if (key == "ends" ) { ends = value; } if (key == "processors" ) { processors = value; } + if (key == "size" ) { size = value; } + + + + if (key == "template") { templatefile = value; } if (key == "search") { search = value; } if (key == "ksize") { ksize = value; } @@ -155,6 +160,8 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "scale") { scale = value; } if (key == "ends" ) { ends = value; } if (key == "processors" ) { processors = value; } + if (key == "size" ) { size = value; } + if (key == "template") { templatefile = value; } if (key == "search") { search = value; } if (key == "ksize") { ksize = value; } @@ -196,7 +203,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ //input defaults for calculators if (commandName == "collect.single") { - if ((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } + if ((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson-efron-boneh-solow"; } Estimators.clear(); splitAtDash(calc, Estimators); } @@ -212,7 +219,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ splitAtDash(calc, Estimators); } if (commandName == "summary.single") { - if ((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } + if ((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson-efron-boneh-solow"; } Estimators.clear(); splitAtDash(calc, Estimators); } @@ -300,6 +307,13 @@ string GlobalData::getFilter() { return filter; } string GlobalData::getScale() { return scale; } string GlobalData::getEnds() { return ends; } string GlobalData::getProcessors() { return processors; } +string GlobalData::getSize() { return size; } + +void GlobalData::setListFile(string file) { listfile = file; inputFileName = file;} +void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file;} +void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file;} +void GlobalData::setPhylipFile(string file) { phylipfile = file; inputFileName = file;} +void GlobalData::setColumnFile(string file) { columnfile = file; inputFileName = file;} string GlobalData::getTemplateFile() { return templatefile;} string GlobalData::getSearch() { return search; } string GlobalData::getKSize() { return ksize; } @@ -309,12 +323,7 @@ string GlobalData::getMismatch() { return mismatch; } string GlobalData::getGapopen() { return gapopen; } string GlobalData::getGapextend() { return gapextend; } -void GlobalData::setListFile(string file) { listfile = file; inputFileName = file;} void GlobalData::setGroupFile(string file) { groupfile = file; } -void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file;} -void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file;} -void GlobalData::setPhylipFile(string file) { phylipfile = file; inputFileName = file;} -void GlobalData::setColumnFile(string file) { columnfile = file; inputFileName = file;} void GlobalData::setSharedFile(string file) { sharedfile = file; inputFileName = file; fileroot = file;} void GlobalData::setNameFile(string file) { namefile = file; } void GlobalData::setFormat(string Format) { format = Format; } @@ -377,6 +386,7 @@ void GlobalData::clear() { scale = "log10"; ends = "T"; //yes processors = "1"; + size = "1000"; search = "suffix"; ksize = "7"; align = "blast"; @@ -384,8 +394,6 @@ void GlobalData::clear() { mismatch = "-1.0"; gapopen = "-5.0"; gapextend = "-2.0"; - - } //*******************************************************/ @@ -407,6 +415,7 @@ void GlobalData::reset() { form = "integral"; ends = "T"; processors = "1"; + size = "1000"; search = "suffix"; ksize = "7"; align = "blast"; diff --git a/globaldata.hpp b/globaldata.hpp index cb9aff6..d427aba 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -77,6 +77,7 @@ public: string getSorted(); string getEnds(); string getProcessors(); + string getSize(); string getTemplateFile(); string getSearch(); string getKSize(); @@ -123,7 +124,7 @@ public: private: string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, nexusfile, clustalfile, treefile, sharedfile, line, label, randomtree, groups; - string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, trump, soft, filter, scale, ends, processors, templatefile, search, ksize, align, match; + string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, trump, soft, filter, scale, ends, processors, templatefile, search, ksize, align, match, size; string mismatch, gapopen, gapextend; diff --git a/solow.cpp b/solow.cpp new file mode 100644 index 0000000..3a15fdf --- /dev/null +++ b/solow.cpp @@ -0,0 +1,39 @@ +/* + * solow.cpp + * Mothur + * + * Created by Thomas Ryabin on 5/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "solow.h" +#include + + +/***********************************************************************/ +EstOutput Solow::getValues(SAbundVector* rank){ + + try { + data.resize(1,0); + + double n = (double)rank->size() - 1; + double f1 = (double)rank->get(1); + double f2 = (double)rank->get(2); + + data[0] = f1*f1/2/f2 * (1 - pow(1 - 2*f2/n/f1, m)); + + return data; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +}; + + +/***********************************************************************/ diff --git a/solow.h b/solow.h new file mode 100644 index 0000000..6d58df2 --- /dev/null +++ b/solow.h @@ -0,0 +1,33 @@ +#ifndef SOLOW_H +#define SOLOW_H + +/* + * solow.h + * Mothur + * + * Created by Thomas Ryabin on 5/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "calculator.h" + +/* This class implements the solow calculator on single group. + It is a child of the calculator class. */ + +/***********************************************************************/ + +class Solow : public Calculator { + +public: + Solow(int size) : m(size), Calculator("solow", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +private: + int m; +}; + + +/***********************************************************************/ + +#endif diff --git a/summarycommand.cpp b/summarycommand.cpp index bd8deb1..d0124d8 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -24,6 +24,9 @@ #include "bstick.h" #include "goodscoverage.h" #include "coverage.h" +#include "efron.h" +#include "boneh.h" +#include "solow.h" //********************************************************************************************************************** @@ -70,6 +73,15 @@ SummaryCommand::SummaryCommand(){ sumCalculators.push_back(new NSeqs()); }else if (globaldata->Estimators[i] == "goodscoverage") { sumCalculators.push_back(new GoodsCoverage()); + }else if (globaldata->Estimators[i] == "efron") { + convert(globaldata->getSize(), size); + sumCalculators.push_back(new Efron(size)); + }else if (globaldata->Estimators[i] == "boneh") { + convert(globaldata->getSize(), size); + sumCalculators.push_back(new Boneh(size)); + }else if (globaldata->Estimators[i] == "solow") { + convert(globaldata->getSize(), size); + sumCalculators.push_back(new Solow(size)); } } } diff --git a/summarycommand.h b/summarycommand.h index 8f42f45..135c402 100644 --- a/summarycommand.h +++ b/summarycommand.h @@ -46,6 +46,6 @@ private: SAbundVector* sabund; string outputFileName; ofstream outputFileHandle; - int abund; + int abund, size; }; #endif diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index 486e6c4..46055c0 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -30,7 +30,7 @@ #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" #include "sharedjackknife.h" -#include "sharedwhittaker.h" +#include "whittaker.h" //********************************************************************************************************************** diff --git a/validcalculator.cpp b/validcalculator.cpp index 14fa775..7a07fd1 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -201,6 +201,9 @@ void ValidCalculators::initialSingle() { single["goodscoverage"] = "goodscoverage"; single["nseqs"] = "nseqs"; single["coverage"] = "coverage"; + single["efron"] = "efron"; + single["boneh"] = "boneh"; + single["solow"] = "solow"; single["default"] = "default"; } catch(exception& e) { @@ -294,6 +297,9 @@ void ValidCalculators::initialSummary() { summary["nseqs"] = "nseqs"; summary["goodscoverage"]= "goodscoverage"; summary["coverage"] = "coverage"; + summary["efron"] = "efron"; + summary["boneh"] = "boneh"; + summary["solow"] = "solow"; summary["default"] = "default"; } catch(exception& e) { diff --git a/validparameter.cpp b/validparameter.cpp index e79a2db..1a1555d 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -225,7 +225,7 @@ void ValidParameters::initCommandParameters() { string deconvoluteArray[] = {"fasta"}; commandParameters["deconvolute"] = addParameters(deconvoluteArray, sizeof(deconvoluteArray)/sizeof(string)); - string collectsingleArray[] = {"freq","line","label","calc","abund"}; + string collectsingleArray[] = {"freq","line","label","calc","abund","size"}; commandParameters["collect.single"] = addParameters(collectsingleArray, sizeof(collectsingleArray)/sizeof(string)); string collectsharedArray[] = {"freq","line","label","calc","groups"}; @@ -249,7 +249,7 @@ void ValidParameters::initCommandParameters() { string libshuffArray[] = {"iters","groups","step","form","cutoff"}; commandParameters["libshuff"] = addParameters(libshuffArray, sizeof(libshuffArray)/sizeof(string)); - string summarysingleArray[] = {"line","label","calc","abund"}; + string summarysingleArray[] = {"line","label","calc","abund","size"}; commandParameters["summary.single"] = addParameters(summarysingleArray, sizeof(summarysingleArray)/sizeof(string)); string summarysharedArray[] = {"line","label","calc","groups"}; @@ -346,6 +346,9 @@ void ValidParameters::initParameterRanges() { string softArray[] = {">=","0", "<=","100", "between"}; parameterRanges["soft"] = addParameters(softArray, rangeSize); + + string sizeArray[] = {">=","1", "<","NA", "between"}; + parameterRanges["size"] = addParameters(sizeArray, rangeSize); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ValidParameters class Function isValidParameter. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";