From: pschloss Date: Mon, 23 Aug 2010 11:52:06 +0000 (+0000) Subject: added a few evenness calculators and fixed a couple of bugs in filter.seqs and pre... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=10e9f1ff74515ff887519b08e50c30c47d2472d2 added a few evenness calculators and fixed a couple of bugs in filter.seqs and pre.cluster --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index e319668..9966314 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -7,10 +7,20 @@ objects = { /* Begin PBXFileReference section */ + 7E4EBD43122018FB00D85E7B /* simpsoneven.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simpsoneven.h; sourceTree = ""; }; + 7E4EBD44122018FB00D85E7B /* simpsoneven.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simpsoneven.cpp; sourceTree = ""; }; + 7E5B28DC121FEFCC0005339C /* shannoneven.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shannoneven.h; sourceTree = ""; }; + 7E5B28DD121FEFCC0005339C /* shannoneven.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shannoneven.cpp; sourceTree = ""; }; + 7E5B2917121FF53C0005339C /* heip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = heip.h; sourceTree = ""; }; + 7E5B2918121FF53C0005339C /* heip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = heip.cpp; sourceTree = ""; }; + 7E5B294A121FFADC0005339C /* smithwilson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = smithwilson.h; sourceTree = ""; }; + 7E5B294B121FFADC0005339C /* smithwilson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = smithwilson.cpp; sourceTree = ""; }; 7E84528511EF4BEB00564975 /* seqerrorcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqerrorcommand.h; sourceTree = ""; }; 7E84528611EF4BEB00564975 /* seqerrorcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = seqerrorcommand.cpp; sourceTree = ""; }; 7E85BD1C11EB5E9B00FD37C0 /* qualityscores.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qualityscores.h; sourceTree = ""; }; 7E85BD1D11EB5E9B00FD37C0 /* qualityscores.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = qualityscores.cpp; sourceTree = ""; }; + 7E962A40121F76B1007464B5 /* invsimpson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = invsimpson.h; sourceTree = ""; }; + 7E962A41121F76B1007464B5 /* invsimpson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = invsimpson.cpp; sourceTree = ""; }; 7EA299BA11E384940022D8D3 /* sensspeccommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sensspeccommand.h; sourceTree = ""; }; 7EA299BB11E384940022D8D3 /* sensspeccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sensspeccommand.cpp; sourceTree = ""; }; 7EC61A0911BEA6AF00F668D9 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = ""; }; @@ -480,6 +490,8 @@ 08FB7794FE84155DC02AAC07 /* mothur */ = { isa = PBXGroup; children = ( + 7E5B28DC121FEFCC0005339C /* shannoneven.h */, + 7E5B28DD121FEFCC0005339C /* shannoneven.cpp */, A7639F8D1175DF35008F5578 /* makefile */, A7DA1FF0113FECD400BF472F /* alignment.cpp */, A7DA1FF1113FECD400BF472F /* alignment.hpp */, @@ -570,6 +582,12 @@ A7CB593B11402EF90010EB83 /* calculators */ = { isa = PBXGroup; children = ( + 7E4EBD43122018FB00D85E7B /* simpsoneven.h */, + 7E4EBD44122018FB00D85E7B /* simpsoneven.cpp */, + 7E5B294A121FFADC0005339C /* smithwilson.h */, + 7E5B294B121FFADC0005339C /* smithwilson.cpp */, + 7E962A40121F76B1007464B5 /* invsimpson.h */, + 7E962A41121F76B1007464B5 /* invsimpson.cpp */, A7DA200B113FECD400BF472F /* calculator.cpp */, A7DA200C113FECD400BF472F /* calculator.h */, A7DA1FED113FECD400BF472F /* ace.h */, @@ -684,6 +702,8 @@ A7CB593E11402F110010EB83 /* commands */ = { isa = PBXGroup; children = ( + 7E5B2917121FF53C0005339C /* heip.h */, + 7E5B2918121FF53C0005339C /* heip.cpp */, A7DA202B113FECD400BF472F /* command.hpp */, A7DA1FEF113FECD400BF472F /* aligncommand.h */, A7DA1FEE113FECD400BF472F /* aligncommand.cpp */, diff --git a/collectcommand.cpp b/collectcommand.cpp index 8c1565b..ca48d75 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -14,8 +14,13 @@ #include "chao1.h" #include "bootstrap.h" #include "simpson.h" +#include "simpsoneven.h" +#include "invsimpson.h" #include "npshannon.h" #include "shannon.h" +#include "smithwilson.h" +#include "heip.h" +#include "shannoneven.h" #include "jackknife.h" #include "geom.h" #include "qstat.h" @@ -178,12 +183,27 @@ int CollectCommand::execute(){ }else if (Estimators[i] == "shannon") { cDisplays.push_back(new CollectDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"shannon"))); outputNames.push_back(fileNameRoot+"shannon"); + }else if (Estimators[i] == "shannoneven") { + cDisplays.push_back(new CollectDisplay(new ShannonEven(), new OneColumnFile(fileNameRoot+"shannoneven"))); + outputNames.push_back(fileNameRoot+"shannoneven"); }else if (Estimators[i] == "npshannon") { cDisplays.push_back(new CollectDisplay(new NPShannon(), new OneColumnFile(fileNameRoot+"np_shannon"))); outputNames.push_back(fileNameRoot+"np_shannon"); + }else if (Estimators[i] == "heip") { + cDisplays.push_back(new CollectDisplay(new Heip(), new OneColumnFile(fileNameRoot+"heip"))); + outputNames.push_back(fileNameRoot+"heip"); + }else if (Estimators[i] == "smithwilson") { + cDisplays.push_back(new CollectDisplay(new SmithWilson(), new OneColumnFile(fileNameRoot+"smithwilson"))); + outputNames.push_back(fileNameRoot+"smithwilson"); }else if (Estimators[i] == "simpson") { cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"simpson"))); outputNames.push_back(fileNameRoot+"simpson"); + }else if (Estimators[i] == "simpsoneven") { + cDisplays.push_back(new CollectDisplay(new SimpsonEven(), new OneColumnFile(fileNameRoot+"simpsoneven"))); + outputNames.push_back(fileNameRoot+"simpsoneven"); + }else if (Estimators[i] == "invsimpson") { + cDisplays.push_back(new CollectDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+"invsimpson"))); + outputNames.push_back(fileNameRoot+"invsimpson"); }else if (Estimators[i] == "bootstrap") { cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+"bootstrap"))); outputNames.push_back(fileNameRoot+"bootstrap"); diff --git a/heip.cpp b/heip.cpp new file mode 100644 index 0000000..7ae285d --- /dev/null +++ b/heip.cpp @@ -0,0 +1,39 @@ +/* + * heip.cpp + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "heip.h" +#include "shannon.h" + +/***********************************************************************/ + +EstOutput Heip::getValues(SAbundVector* rank){ + try { + data.resize(1,0.0000); + vector shanData(3,0); + Shannon* shannon = new Shannon(); + shanData = shannon->getValues(rank); + + long int sobs = rank->getNumBins(); + if(sobs > 1){ + data[0] = (exp(shanData[0])-1) / (sobs - 1);; + } + else{ + data[0] = 1; + } + + delete shannon; + return data; + } + catch(exception& e) { + m->errorOut(e, "Heip", "getValues"); + exit(1); + } +} + +/***********************************************************************/ diff --git a/heip.h b/heip.h new file mode 100644 index 0000000..3ae115f --- /dev/null +++ b/heip.h @@ -0,0 +1,27 @@ +#ifndef HEIP +#define HEIP + +/* + * heip.h + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "calculator.h" + +/***********************************************************************/ + +class Heip : public Calculator { + +public: + Heip() : Calculator("heip", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +}; + +/***********************************************************************/ + +#endif diff --git a/invsimpson.cpp b/invsimpson.cpp new file mode 100644 index 0000000..238dea6 --- /dev/null +++ b/invsimpson.cpp @@ -0,0 +1,42 @@ +/* + * invsimpson.cpp + * Mothur + * + * Created by Pat Schloss on 8/20/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "invsimpson.h" +#include "simpson.h" + +/***********************************************************************/ + +EstOutput InvSimpson::getValues(SAbundVector* rank){ + try { + //vector simpsonData(3,0); + data.resize(3,0); + vector simpData(3,0); + Simpson* simp = new Simpson(); + simpData = simp->getValues(rank); + + if(simpData[0] != 0){ + data[0] = 1/simpData[0]; + data[1] = 1/simpData[2]; + data[2] = 1/simpData[1]; + } + else{ + data.assign(3,1); + } + + delete simp; + + return data; + } + catch(exception& e) { + m->errorOut(e, "InvSimpson", "getValues"); + exit(1); + } +} + +/***********************************************************************/ diff --git a/invsimpson.h b/invsimpson.h new file mode 100644 index 0000000..e89d8a2 --- /dev/null +++ b/invsimpson.h @@ -0,0 +1,29 @@ +#ifndef INVSIMPSON +#define INVSIMPSON + +/* + * invsimpson.h + * Mothur + * + * Created by Pat Schloss on 8/20/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + + + +#include "calculator.h" + +/***********************************************************************/ + +class InvSimpson : public Calculator { + +public: + InvSimpson() : Calculator("invsimpson", 3, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +}; + +/***********************************************************************/ + +#endif diff --git a/preclustercommand.cpp b/preclustercommand.cpp index cab2963..cfa91bf 100644 --- a/preclustercommand.cpp +++ b/preclustercommand.cpp @@ -306,9 +306,9 @@ void PreClusterCommand::readNameFile(){ in >> firstCol >> secondCol; gobble(in); names[firstCol] = secondCol; int size = 1; - while (secondCol.find_first_of(',') != -1) { - size++; - secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length()); + + for(int i=0;i simpsonData(3,0); + data.resize(1,0); + vector shanData(3,0); + Shannon* shannon = new Shannon(); + shanData = shannon->getValues(rank); + + long int sobs = rank->getNumBins(); + if(sobs > 1){ + data[0] = shanData[0] / log(sobs); + } + else{ + data[0] = 1; + } + + delete shannon; + return data; + } + catch(exception& e) { + m->errorOut(e, "ShannonEven", "getValues"); + exit(1); + } +} + +/***********************************************************************/ diff --git a/shannoneven.h b/shannoneven.h new file mode 100644 index 0000000..5b03d72 --- /dev/null +++ b/shannoneven.h @@ -0,0 +1,27 @@ +#ifndef SHANNONEVEN +#define SHANNONEVEN + +/* + * shannoneven.h + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "calculator.h" + +/***********************************************************************/ + +class ShannonEven : public Calculator { + +public: + ShannonEven() : Calculator("shannoneven", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +}; + +/***********************************************************************/ + +#endif diff --git a/simpsoneven.cpp b/simpsoneven.cpp new file mode 100644 index 0000000..e34534a --- /dev/null +++ b/simpsoneven.cpp @@ -0,0 +1,34 @@ +/* + * simpsoneven.cpp + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "simpsoneven.h" +#include "invsimpson.h" + +/***********************************************************************/ + +EstOutput SimpsonEven::getValues(SAbundVector* rank){ + try { + data.resize(1,0); + + InvSimpson* simp = new InvSimpson(); + vector invSimpData = simp->getValues(rank); + + data[0] = invSimpData[0] / double(rank->getNumBins()); + + + return data; + } + catch(exception& e) { + m->errorOut(e, "SimpsonEven", "getValues"); + exit(1); + } +} + +/***********************************************************************/ + diff --git a/simpsoneven.h b/simpsoneven.h new file mode 100644 index 0000000..0fb8949 --- /dev/null +++ b/simpsoneven.h @@ -0,0 +1,28 @@ +#ifndef SIMPSONEVEN_H +#define SIMPSONEVEN_H + +/* + * simpsoneven.h + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "calculator.h" + +/***********************************************************************/ + +class SimpsonEven : public Calculator { + +public: + SimpsonEven() : Calculator("simpsoneven", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +}; + +/***********************************************************************/ + +#endif + diff --git a/smithwilson.cpp b/smithwilson.cpp new file mode 100644 index 0000000..ec04196 --- /dev/null +++ b/smithwilson.cpp @@ -0,0 +1,50 @@ +/* + * smithwilson.cpp + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "smithwilson.h" + +/***********************************************************************/ + +EstOutput SmithWilson::getValues(SAbundVector* rank){ + try { + + data.resize(1,0); + + double maxRank = rank->getMaxRank(); + double sobs = rank->getNumBins(); + + double innerSum = 0; + for(int i=1;i<=maxRank;i++){ + innerSum += rank->get(i) * log(i); + } + innerSum /= sobs; + + double outerSum = 0; + for(int i=1;i<=maxRank;i++){ + outerSum += rank->get(i) * (log(i) - innerSum) * (log(i) - innerSum); + } + outerSum /= sobs; + + if(outerSum > 0){ + data[0] = 1.0000 - 2.0000 / (3.14159 * atan(outerSum)); + } + else{ + data[0] = 1.0000; + } + + + return data; + } + catch(exception& e) { + m->errorOut(e, "InvSimpson", "getValues"); + exit(1); + } +} + +/***********************************************************************/ diff --git a/smithwilson.h b/smithwilson.h new file mode 100644 index 0000000..6e060d9 --- /dev/null +++ b/smithwilson.h @@ -0,0 +1,27 @@ +#ifndef SMITHWILSON +#define SMITHWILSON + +/* + * smithwilson.h + * Mothur + * + * Created by Pat Schloss on 8/21/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "calculator.h" + +/***********************************************************************/ + +class SmithWilson : public Calculator { + +public: + SmithWilson() : Calculator("smithwilson", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; +}; + +/***********************************************************************/ + +#endif diff --git a/summarycommand.cpp b/summarycommand.cpp index 95f4262..087e63f 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -14,8 +14,13 @@ #include "chao1.h" #include "bootstrap.h" #include "simpson.h" +#include "simpsoneven.h" +#include "invsimpson.h" #include "npshannon.h" #include "shannon.h" +#include "heip.h" +#include "smithwilson.h" +#include "shannoneven.h" #include "jackknife.h" #include "geom.h" #include "logsd.h" @@ -193,10 +198,20 @@ int SummaryCommand::execute(){ sumCalculators.push_back(new Jackknife()); }else if(Estimators[i] == "shannon"){ sumCalculators.push_back(new Shannon()); + }else if(Estimators[i] == "shannoneven"){ + sumCalculators.push_back(new ShannonEven()); }else if(Estimators[i] == "npshannon"){ sumCalculators.push_back(new NPShannon()); + }else if(Estimators[i] == "heip"){ + sumCalculators.push_back(new Heip()); + }else if(Estimators[i] == "smithwilson"){ + sumCalculators.push_back(new SmithWilson()); }else if(Estimators[i] == "simpson"){ sumCalculators.push_back(new Simpson()); + }else if(Estimators[i] == "simpsoneven"){ + sumCalculators.push_back(new SimpsonEven()); + }else if(Estimators[i] == "invsimpson"){ + sumCalculators.push_back(new InvSimpson()); }else if(Estimators[i] == "bootstrap"){ sumCalculators.push_back(new Bootstrap()); }else if (Estimators[i] == "nseqs") { diff --git a/validcalculator.cpp b/validcalculator.cpp index 0ade873..bc8c4b8 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -209,11 +209,16 @@ void ValidCalculators::initialSingle() { single["jack"] = "jack"; single["shannon"] = "shannon"; single["npshannon"] = "npshannon"; + single["shannoneven"] = "shannoneven"; + single["smithwilson"] = "smithwilson"; + single["heip"] = "heip"; single["simpson"] = "simpson"; + single["simpsoneven"] = "simpsoneven"; + single["invsimpson"] = "invsimpson"; single["bergerparker"] = "bergerparker"; single["bootstrap"] = "bootstrap"; single["geometric"] = "geometric"; - single["logseries"] = "logseries"; + single["logseries"] = "logseries"; single["qstat"] = "qstat"; single["bstick"] = "bstick"; single["goodscoverage"] = "goodscoverage"; @@ -271,8 +276,13 @@ void ValidCalculators::initialRarefaction() { rarefaction["ace"] = "ace"; rarefaction["jack"] = "jack"; rarefaction["shannon"] = "shannon"; + rarefaction["smithwilson"] = "smithwilson"; + rarefaction["heip"] = "heip"; rarefaction["npshannon"] = "npshannon"; + rarefaction["shannoneven"] = "shannoneven"; rarefaction["simpson"] = "simpson"; + rarefaction["invsimpson"] = "invsimpson"; + rarefaction["simpsoneven"] = "simpsoneven"; rarefaction["bootstrap"] = "bootstrap"; rarefaction["nseqs"] = "nseqs"; rarefaction["coverage"] = "coverage"; @@ -293,8 +303,13 @@ void ValidCalculators::initialSummary() { summary["ace"] = "ace"; summary["jack"] = "jack"; summary["shannon"] = "shannon"; + summary["heip"] = "heip"; + summary["shannoneven"] = "shannoneven"; + summary["smithwilson"] = "smithwilson"; + summary["invsimpson"] = "invsimpson"; summary["npshannon"] = "npshannon"; summary["simpson"] = "simpson"; + summary["simpsoneven"] = "simpsoneven"; summary["bergerparker"] = "bergerparker"; summary["geometric"] = "geometric"; summary["bootstrap"] = "bootstrap";