From 10e9f1ff74515ff887519b08e50c30c47d2472d2 Mon Sep 17 00:00:00 2001 From: pschloss Date: Mon, 23 Aug 2010 11:52:06 +0000 Subject: [PATCH] added a few evenness calculators and fixed a couple of bugs in filter.seqs and pre.cluster --- Mothur.xcodeproj/project.pbxproj | 20 +++++++++++++ collectcommand.cpp | 20 +++++++++++++ heip.cpp | 39 +++++++++++++++++++++++++ heip.h | 27 +++++++++++++++++ invsimpson.cpp | 42 +++++++++++++++++++++++++++ invsimpson.h | 29 ++++++++++++++++++ preclustercommand.cpp | 6 ++-- rarefactcommand.cpp | 20 +++++++++++++ shannoneven.cpp | 40 +++++++++++++++++++++++++ shannoneven.h | 27 +++++++++++++++++ simpsoneven.cpp | 34 ++++++++++++++++++++++ simpsoneven.h | 28 ++++++++++++++++++ smithwilson.cpp | 50 ++++++++++++++++++++++++++++++++ smithwilson.h | 27 +++++++++++++++++ summarycommand.cpp | 15 ++++++++++ validcalculator.cpp | 17 ++++++++++- 16 files changed, 437 insertions(+), 4 deletions(-) create mode 100644 heip.cpp create mode 100644 heip.h create mode 100644 invsimpson.cpp create mode 100644 invsimpson.h create mode 100644 shannoneven.cpp create mode 100644 shannoneven.h create mode 100644 simpsoneven.cpp create mode 100644 simpsoneven.h create mode 100644 smithwilson.cpp create mode 100644 smithwilson.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index e319668..9966314 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -7,10 +7,20 @@ objects = { /* Begin PBXFileReference section */ + 7E4EBD43122018FB00D85E7B /* simpsoneven.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simpsoneven.h; sourceTree = ""; }; + 7E4EBD44122018FB00D85E7B /* simpsoneven.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simpsoneven.cpp; sourceTree = ""; }; + 7E5B28DC121FEFCC0005339C /* shannoneven.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shannoneven.h; sourceTree = ""; }; + 7E5B28DD121FEFCC0005339C /* shannoneven.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shannoneven.cpp; sourceTree = ""; }; + 7E5B2917121FF53C0005339C /* heip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = heip.h; sourceTree = ""; }; + 7E5B2918121FF53C0005339C /* heip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = heip.cpp; sourceTree = ""; }; + 7E5B294A121FFADC0005339C /* smithwilson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = smithwilson.h; sourceTree = ""; }; + 7E5B294B121FFADC0005339C /* smithwilson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = smithwilson.cpp; sourceTree = ""; }; 7E84528511EF4BEB00564975 /* seqerrorcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqerrorcommand.h; sourceTree = ""; }; 7E84528611EF4BEB00564975 /* seqerrorcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = seqerrorcommand.cpp; sourceTree = ""; }; 7E85BD1C11EB5E9B00FD37C0 /* qualityscores.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qualityscores.h; sourceTree = ""; }; 7E85BD1D11EB5E9B00FD37C0 /* qualityscores.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = qualityscores.cpp; sourceTree = ""; }; + 7E962A40121F76B1007464B5 /* invsimpson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = invsimpson.h; sourceTree = ""; }; + 7E962A41121F76B1007464B5 /* invsimpson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = invsimpson.cpp; sourceTree = ""; }; 7EA299BA11E384940022D8D3 /* sensspeccommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sensspeccommand.h; sourceTree = ""; }; 7EA299BB11E384940022D8D3 /* sensspeccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sensspeccommand.cpp; sourceTree = ""; }; 7EC61A0911BEA6AF00F668D9 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = ""; }; @@ -480,6 +490,8 @@ 08FB7794FE84155DC02AAC07 /* mothur */ = { isa = PBXGroup; children = ( + 7E5B28DC121FEFCC0005339C /* shannoneven.h */, + 7E5B28DD121FEFCC0005339C /* shannoneven.cpp */, A7639F8D1175DF35008F5578 /* makefile */, A7DA1FF0113FECD400BF472F /* alignment.cpp */, A7DA1FF1113FECD400BF472F /* alignment.hpp */, @@ -570,6 +582,12 @@ A7CB593B11402EF90010EB83 /* calculators */ = { isa = PBXGroup; children = ( + 7E4EBD43122018FB00D85E7B /* simpsoneven.h */, + 7E4EBD44122018FB00D85E7B /* simpsoneven.cpp */, + 7E5B294A121FFADC0005339C /* smithwilson.h */, + 7E5B294B121FFADC0005339C /* smithwilson.cpp */, + 7E962A40121F76B1007464B5 /* invsimpson.h */, + 7E962A41121F76B1007464B5 /* invsimpson.cpp */, A7DA200B113FECD400BF472F /* calculator.cpp */, A7DA200C113FECD400BF472F /* calculator.h */, A7DA1FED113FECD400BF472F /* ace.h */, @@ -684,6 +702,8 @@ A7CB593E11402F110010EB83 /* commands */ = { isa = PBXGroup; children = ( + 7E5B2917121FF53C0005339C /* heip.h */, + 7E5B2918121FF53C0005339C /* heip.cpp */, A7DA202B113FECD400BF472F /* command.hpp */, A7DA1FEF113FECD400BF472F /* aligncommand.h */, A7DA1FEE113FECD400BF472F /* aligncommand.cpp */, diff --git a/collectcommand.cpp b/collectcommand.cpp index 8c1565b..ca48d75 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -14,8 +14,13 @@ #include "chao1.h" #include "bootstrap.h" #include "simpson.h" +#include "simpsoneven.h" +#include "invsimpson.h" #include "npshannon.h" #include "shannon.h" +#include "smithwilson.h" +#include "heip.h" +#include "shannoneven.h" #include "jackknife.h" #include "geom.h" #include "qstat.h" @@ -178,12 +183,27 @@ int CollectCommand::execute(){ }else if (Estimators[i] == "shannon") { cDisplays.push_back(new CollectDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"shannon"))); outputNames.push_back(fileNameRoot+"shannon"); + }else if (Estimators[i] == "shannoneven") { + cDisplays.push_back(new CollectDisplay(new ShannonEven(), new OneColumnFile(fileNameRoot+"shannoneven"))); + outputNames.push_back(fileNameRoot+"shannoneven"); }else if (Estimators[i] == "npshannon") { cDisplays.push_back(new CollectDisplay(new NPShannon(), new OneColumnFile(fileNameRoot+"np_shannon"))); outputNames.push_back(fileNameRoot+"np_shannon"); + }else if (Estimators[i] == "heip") { + cDisplays.push_back(new CollectDisplay(new Heip(), new OneColumnFile(fileNameRoot+"heip"))); + outputNames.push_back(fileNameRoot+"heip"); + }else if (Estimators[i] == "smithwilson") { + cDisplays.push_back(new CollectDisplay(new SmithWilson(), new OneColumnFile(fileNameRoot+"smithwilson"))); + outputNames.push_back(fileNameRoot+"smithwilson"); }else if (Estimators[i] == "simpson") { cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"simpson"))); outputNames.push_back(fileNameRoot+"simpson"); + }else if (Estimators[i] == "simpsoneven") { + cDisplays.push_back(new CollectDisplay(new SimpsonEven(), new OneColumnFile(fileNameRoot+"simpsoneven"))); + outputNames.push_back(fileNameRoot+"simpsoneven"); + }else if (Estimators[i] == "invsimpson") { + cDisplays.push_back(new CollectDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+"invsimpson"))); + outputNames.push_back(fileNameRoot+"invsimpson"); }else if (Estimators[i] == "bootstrap") { cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+"bootstrap"))); outputNames.push_back(fileNameRoot+"bootstrap"); diff --git a/heip.cpp b/heip.cpp new file mode 100644 index 0000000..7ae285d --- /dev/null +++ b/heip.cpp @@ -0,0 +1,39 @@ +/* + * heip.cpp + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "heip.h" +#include "shannon.h" + +/***********************************************************************/ + +EstOutput Heip::getValues(SAbundVector* rank){ + try { + data.resize(1,0.0000); + vector shanData(3,0); + Shannon* shannon = new Shannon(); + shanData = shannon->getValues(rank); + + long int sobs = rank->getNumBins(); + if(sobs > 1){ + data[0] = (exp(shanData[0])-1) / (sobs - 1);; + } + else{ + data[0] = 1; + } + + delete shannon; + return data; + } + catch(exception& e) { + m->errorOut(e, "Heip", "getValues"); + exit(1); + } +} + +/***********************************************************************/ diff --git a/heip.h b/heip.h new file mode 100644 index 0000000..3ae115f --- /dev/null +++ b/heip.h @@ -0,0 +1,27 @@ +#ifndef HEIP +#define HEIP + +/* + * heip.h + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "calculator.h" + +/***********************************************************************/ + +class Heip : public Calculator { + +public: + Heip() : Calculator("heip", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +}; + +/***********************************************************************/ + +#endif diff --git a/invsimpson.cpp b/invsimpson.cpp new file mode 100644 index 0000000..238dea6 --- /dev/null +++ b/invsimpson.cpp @@ -0,0 +1,42 @@ +/* + * invsimpson.cpp + * Mothur + * + * Created by Pat Schloss on 8/20/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "invsimpson.h" +#include "simpson.h" + +/***********************************************************************/ + +EstOutput InvSimpson::getValues(SAbundVector* rank){ + try { + //vector simpsonData(3,0); + data.resize(3,0); + vector simpData(3,0); + Simpson* simp = new Simpson(); + simpData = simp->getValues(rank); + + if(simpData[0] != 0){ + data[0] = 1/simpData[0]; + data[1] = 1/simpData[2]; + data[2] = 1/simpData[1]; + } + else{ + data.assign(3,1); + } + + delete simp; + + return data; + } + catch(exception& e) { + m->errorOut(e, "InvSimpson", "getValues"); + exit(1); + } +} + +/***********************************************************************/ diff --git a/invsimpson.h b/invsimpson.h new file mode 100644 index 0000000..e89d8a2 --- /dev/null +++ b/invsimpson.h @@ -0,0 +1,29 @@ +#ifndef INVSIMPSON +#define INVSIMPSON + +/* + * invsimpson.h + * Mothur + * + * Created by Pat Schloss on 8/20/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + + + +#include "calculator.h" + +/***********************************************************************/ + +class InvSimpson : public Calculator { + +public: + InvSimpson() : Calculator("invsimpson", 3, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +}; + +/***********************************************************************/ + +#endif diff --git a/preclustercommand.cpp b/preclustercommand.cpp index cab2963..cfa91bf 100644 --- a/preclustercommand.cpp +++ b/preclustercommand.cpp @@ -306,9 +306,9 @@ void PreClusterCommand::readNameFile(){ in >> firstCol >> secondCol; gobble(in); names[firstCol] = secondCol; int size = 1; - while (secondCol.find_first_of(',') != -1) { - size++; - secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length()); + + for(int i=0;i simpsonData(3,0); + data.resize(1,0); + vector shanData(3,0); + Shannon* shannon = new Shannon(); + shanData = shannon->getValues(rank); + + long int sobs = rank->getNumBins(); + if(sobs > 1){ + data[0] = shanData[0] / log(sobs); + } + else{ + data[0] = 1; + } + + delete shannon; + return data; + } + catch(exception& e) { + m->errorOut(e, "ShannonEven", "getValues"); + exit(1); + } +} + +/***********************************************************************/ diff --git a/shannoneven.h b/shannoneven.h new file mode 100644 index 0000000..5b03d72 --- /dev/null +++ b/shannoneven.h @@ -0,0 +1,27 @@ +#ifndef SHANNONEVEN +#define SHANNONEVEN + +/* + * shannoneven.h + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "calculator.h" + +/***********************************************************************/ + +class ShannonEven : public Calculator { + +public: + ShannonEven() : Calculator("shannoneven", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +}; + +/***********************************************************************/ + +#endif diff --git a/simpsoneven.cpp b/simpsoneven.cpp new file mode 100644 index 0000000..e34534a --- /dev/null +++ b/simpsoneven.cpp @@ -0,0 +1,34 @@ +/* + * simpsoneven.cpp + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "simpsoneven.h" +#include "invsimpson.h" + +/***********************************************************************/ + +EstOutput SimpsonEven::getValues(SAbundVector* rank){ + try { + data.resize(1,0); + + InvSimpson* simp = new InvSimpson(); + vector invSimpData = simp->getValues(rank); + + data[0] = invSimpData[0] / double(rank->getNumBins()); + + + return data; + } + catch(exception& e) { + m->errorOut(e, "SimpsonEven", "getValues"); + exit(1); + } +} + +/***********************************************************************/ + diff --git a/simpsoneven.h b/simpsoneven.h new file mode 100644 index 0000000..0fb8949 --- /dev/null +++ b/simpsoneven.h @@ -0,0 +1,28 @@ +#ifndef SIMPSONEVEN_H +#define SIMPSONEVEN_H + +/* + * simpsoneven.h + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "calculator.h" + +/***********************************************************************/ + +class SimpsonEven : public Calculator { + +public: + SimpsonEven() : Calculator("simpsoneven", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +}; + +/***********************************************************************/ + +#endif + diff --git a/smithwilson.cpp b/smithwilson.cpp new file mode 100644 index 0000000..ec04196 --- /dev/null +++ b/smithwilson.cpp @@ -0,0 +1,50 @@ +/* + * smithwilson.cpp + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "smithwilson.h" + +/***********************************************************************/ + +EstOutput SmithWilson::getValues(SAbundVector* rank){ + try { + + data.resize(1,0); + + double maxRank = rank->getMaxRank(); + double sobs = rank->getNumBins(); + + double innerSum = 0; + for(int i=1;i<=maxRank;i++){ + innerSum += rank->get(i) * log(i); + } + innerSum /= sobs; + + double outerSum = 0; + for(int i=1;i<=maxRank;i++){ + outerSum += rank->get(i) * (log(i) - innerSum) * (log(i) - innerSum); + } + outerSum /= sobs; + + if(outerSum > 0){ + data[0] = 1.0000 - 2.0000 / (3.14159 * atan(outerSum)); + } + else{ + data[0] = 1.0000; + } + + + return data; + } + catch(exception& e) { + m->errorOut(e, "InvSimpson", "getValues"); + exit(1); + } +} + +/***********************************************************************/ diff --git a/smithwilson.h b/smithwilson.h new file mode 100644 index 0000000..6e060d9 --- /dev/null +++ b/smithwilson.h @@ -0,0 +1,27 @@ +#ifndef SMITHWILSON +#define SMITHWILSON + +/* + * smithwilson.h + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "calculator.h" + +/***********************************************************************/ + +class SmithWilson : public Calculator { + +public: + SmithWilson() : Calculator("smithwilson", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +}; + +/***********************************************************************/ + +#endif diff --git a/summarycommand.cpp b/summarycommand.cpp index 95f4262..087e63f 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -14,8 +14,13 @@ #include "chao1.h" #include "bootstrap.h" #include "simpson.h" +#include "simpsoneven.h" +#include "invsimpson.h" #include "npshannon.h" #include "shannon.h" +#include "heip.h" +#include "smithwilson.h" +#include "shannoneven.h" #include "jackknife.h" #include "geom.h" #include "logsd.h" @@ -193,10 +198,20 @@ int SummaryCommand::execute(){ sumCalculators.push_back(new Jackknife()); }else if(Estimators[i] == "shannon"){ sumCalculators.push_back(new Shannon()); + }else if(Estimators[i] == "shannoneven"){ + sumCalculators.push_back(new ShannonEven()); }else if(Estimators[i] == "npshannon"){ sumCalculators.push_back(new NPShannon()); + }else if(Estimators[i] == "heip"){ + sumCalculators.push_back(new Heip()); + }else if(Estimators[i] == "smithwilson"){ + sumCalculators.push_back(new SmithWilson()); }else if(Estimators[i] == "simpson"){ sumCalculators.push_back(new Simpson()); + }else if(Estimators[i] == "simpsoneven"){ + sumCalculators.push_back(new SimpsonEven()); + }else if(Estimators[i] == "invsimpson"){ + sumCalculators.push_back(new InvSimpson()); }else if(Estimators[i] == "bootstrap"){ sumCalculators.push_back(new Bootstrap()); }else if (Estimators[i] == "nseqs") { diff --git a/validcalculator.cpp b/validcalculator.cpp index 0ade873..bc8c4b8 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -209,11 +209,16 @@ void ValidCalculators::initialSingle() { single["jack"] = "jack"; single["shannon"] = "shannon"; single["npshannon"] = "npshannon"; + single["shannoneven"] = "shannoneven"; + single["smithwilson"] = "smithwilson"; + single["heip"] = "heip"; single["simpson"] = "simpson"; + single["simpsoneven"] = "simpsoneven"; + single["invsimpson"] = "invsimpson"; single["bergerparker"] = "bergerparker"; single["bootstrap"] = "bootstrap"; single["geometric"] = "geometric"; - single["logseries"] = "logseries"; + single["logseries"] = "logseries"; single["qstat"] = "qstat"; single["bstick"] = "bstick"; single["goodscoverage"] = "goodscoverage"; @@ -271,8 +276,13 @@ void ValidCalculators::initialRarefaction() { rarefaction["ace"] = "ace"; rarefaction["jack"] = "jack"; rarefaction["shannon"] = "shannon"; + rarefaction["smithwilson"] = "smithwilson"; + rarefaction["heip"] = "heip"; rarefaction["npshannon"] = "npshannon"; + rarefaction["shannoneven"] = "shannoneven"; rarefaction["simpson"] = "simpson"; + rarefaction["invsimpson"] = "invsimpson"; + rarefaction["simpsoneven"] = "simpsoneven"; rarefaction["bootstrap"] = "bootstrap"; rarefaction["nseqs"] = "nseqs"; rarefaction["coverage"] = "coverage"; @@ -293,8 +303,13 @@ void ValidCalculators::initialSummary() { summary["ace"] = "ace"; summary["jack"] = "jack"; summary["shannon"] = "shannon"; + summary["heip"] = "heip"; + summary["shannoneven"] = "shannoneven"; + summary["smithwilson"] = "smithwilson"; + summary["invsimpson"] = "invsimpson"; summary["npshannon"] = "npshannon"; summary["simpson"] = "simpson"; + summary["simpsoneven"] = "simpsoneven"; summary["bergerparker"] = "bergerparker"; summary["geometric"] = "geometric"; summary["bootstrap"] = "bootstrap"; -- 2.39.2