-.DS_Store
-*.zip
+*.logfile
+*.o
*.pbxproj
-*.xcuserdata
\ No newline at end of file
+*.xcuserdata
+*.zip
+.DS_Store
+.idea
+build
+xcuserdata
A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7128B1C16B7002600723BE4 /* getdistscommand.cpp */; };
A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
+ A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */; };
A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; };
A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; };
A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721765613BB9F7D0014DAAE /* referencedb.cpp */; };
A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
A7386C231619CCE600651424 /* classifysharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C211619CCE600651424 /* classifysharedcommand.cpp */; };
A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C241619E52200651424 /* abstractdecisiontree.cpp */; };
- A7386C27161A0F9D00651424 /* abstractrandomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C26161A0F9C00651424 /* abstractrandomforest.cpp */; };
A7386C29161A110800651424 /* decisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C28161A110700651424 /* decisiontree.cpp */; };
A73901081588C40900ED2ED6 /* loadlogfilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73901071588C40900ED2ED6 /* loadlogfilecommand.cpp */; };
A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */; };
A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; };
+ A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741744A175CD9B1007DF49B /* makelefsecommand.cpp */; };
A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; };
A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */; };
A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
A774104814696F320098E6AC /* myseqdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774104614696F320098E6AC /* myseqdist.cpp */; };
A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; };
A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
+ A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77916E6176F7F7600EEFE18 /* designmap.cpp */; };
A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; };
A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7184173D2240002163C2 /* sparcccommand.cpp */; };
A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7186173D4041002163C2 /* randomnumber.cpp */; };
A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; };
A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; };
A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */; };
+ A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */; };
A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; };
A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; };
A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */; };
A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = "<group>"; };
A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = "<group>"; };
A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nmdscommand.cpp; sourceTree = "<group>"; };
+ A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = lefsecommand.cpp; sourceTree = "<group>"; };
+ A7190B211768E0DF00A9AFA6 /* lefsecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lefsecommand.h; sourceTree = "<group>"; };
A71CB15E130B04A2001E7287 /* anosimcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = anosimcommand.cpp; sourceTree = "<group>"; };
A71CB15F130B04A2001E7287 /* anosimcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = anosimcommand.h; sourceTree = "<group>"; };
A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mergegroupscommand.h; sourceTree = "<group>"; };
A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
A727864312E9E28C00F86ABA /* removerarecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removerarecommand.cpp; sourceTree = "<group>"; };
A7386C1B1619CACB00651424 /* abstractdecisiontree.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = abstractdecisiontree.hpp; sourceTree = "<group>"; };
- A7386C1C1619CACB00651424 /* abstractrandomforest.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = abstractrandomforest.hpp; sourceTree = "<group>"; };
A7386C1D1619CACB00651424 /* decisiontree.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = decisiontree.hpp; sourceTree = "<group>"; };
A7386C1E1619CACB00651424 /* macros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = macros.h; sourceTree = "<group>"; };
A7386C1F1619CACB00651424 /* randomforest.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = randomforest.hpp; sourceTree = "<group>"; };
A7386C211619CCE600651424 /* classifysharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifysharedcommand.cpp; sourceTree = "<group>"; };
A7386C221619CCE600651424 /* classifysharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifysharedcommand.h; sourceTree = "<group>"; };
A7386C241619E52200651424 /* abstractdecisiontree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = abstractdecisiontree.cpp; sourceTree = "<group>"; };
- A7386C26161A0F9C00651424 /* abstractrandomforest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = abstractrandomforest.cpp; sourceTree = "<group>"; };
A7386C28161A110700651424 /* decisiontree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = decisiontree.cpp; sourceTree = "<group>"; };
A73901051588C3EF00ED2ED6 /* loadlogfilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = loadlogfilecommand.h; sourceTree = "<group>"; };
A73901071588C40900ED2ED6 /* loadlogfilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = loadlogfilecommand.cpp; sourceTree = "<group>"; };
A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clearmemorycommand.cpp; sourceTree = "<group>"; };
A73DDC3613C4BF64006AAE38 /* mothurmetastats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mothurmetastats.h; sourceTree = "<group>"; };
A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mothurmetastats.cpp; sourceTree = "<group>"; };
+ A741744A175CD9B1007DF49B /* makelefsecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makelefsecommand.cpp; sourceTree = "<group>"; };
+ A741744B175CD9B1007DF49B /* makelefsecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makelefsecommand.h; sourceTree = "<group>"; };
A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequencecountparser.cpp; sourceTree = "<group>"; };
A741FAD415D168A00067BCC5 /* sequencecountparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequencecountparser.h; sourceTree = "<group>"; };
A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kruskalwalliscommand.cpp; sourceTree = "<group>"; };
A77410F514697C300098E6AC /* seqnoise.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqnoise.h; sourceTree = "<group>"; };
A778FE69134CA6CA00C0BA33 /* getcommandinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcommandinfocommand.h; sourceTree = "<group>"; };
A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = "<group>"; };
+ A77916E6176F7F7600EEFE18 /* designmap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = designmap.cpp; sourceTree = "<group>"; };
+ A77916E7176F7F7600EEFE18 /* designmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = designmap.h; sourceTree = "<group>"; };
A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = "<group>"; };
A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = "<group>"; };
A77B7183173D222F002163C2 /* sparcccommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sparcccommand.h; sourceTree = "<group>"; };
A7C3DC0E14FE469500FE1924 /* trialswap2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trialswap2.h; sourceTree = "<group>"; };
A7C7DAB615DA75760059B0CF /* sffmultiplecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sffmultiplecommand.h; sourceTree = "<group>"; };
A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sffmultiplecommand.cpp; sourceTree = "<group>"; };
+ A7CFA42F1755400500D9ED4D /* renameseqscommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = renameseqscommand.h; sourceTree = "<group>"; };
+ A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = renameseqscommand.cpp; sourceTree = "<group>"; };
A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treereader.h; sourceTree = "<group>"; };
A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treereader.cpp; sourceTree = "<group>"; };
A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = "<group>"; };
children = (
A7386C1B1619CACB00651424 /* abstractdecisiontree.hpp */,
A7386C241619E52200651424 /* abstractdecisiontree.cpp */,
- A7386C1C1619CACB00651424 /* abstractrandomforest.hpp */,
- A7386C26161A0F9C00651424 /* abstractrandomforest.cpp */,
A7386C1D1619CACB00651424 /* decisiontree.hpp */,
A7386C28161A110700651424 /* decisiontree.cpp */,
A7386C1E1619CACB00651424 /* macros.h */,
A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */,
A7548FAB17142EA500B1F05A /* getmetacommunitycommand.h */,
A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */,
- A70056E8156A93E300924A2D /* getotulabelscommand.h */,
- A70056E5156A93D000924A2D /* getotulabelscommand.cpp */,
A7E9B6F912D37EC400DA6239 /* getlineagecommand.h */,
A7E9B6F812D37EC400DA6239 /* getlineagecommand.cpp */,
A7E9B6FB12D37EC400DA6239 /* getlistcountcommand.h */,
A7E9B6FE12D37EC400DA6239 /* getoturepcommand.cpp */,
A7E9B70112D37EC400DA6239 /* getotuscommand.h */,
A7E9B70012D37EC400DA6239 /* getotuscommand.cpp */,
+ A70056E8156A93E300924A2D /* getotulabelscommand.h */,
+ A70056E5156A93D000924A2D /* getotulabelscommand.cpp */,
A7E9B70312D37EC400DA6239 /* getrabundcommand.h */,
A7E9B70212D37EC400DA6239 /* getrabundcommand.cpp */,
A7E9B70512D37EC400DA6239 /* getrelabundcommand.h */,
A7E9B72B12D37EC400DA6239 /* indicatorcommand.cpp */,
A7496D2D167B531B00CC7D7C /* kruskalwalliscommand.h */,
A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */,
+ A7190B211768E0DF00A9AFA6 /* lefsecommand.h */,
+ A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */,
A7E9B73C12D37EC400DA6239 /* libshuffcommand.h */,
A7E9B73B12D37EC400DA6239 /* libshuffcommand.cpp */,
A7A0671C156294810095C8C5 /* listotulabelscommand.h */,
A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */,
A7E9B74412D37EC400DA6239 /* makegroupcommand.h */,
A7E9B74312D37EC400DA6239 /* makegroupcommand.cpp */,
+ A741744B175CD9B1007DF49B /* makelefsecommand.h */,
+ A741744A175CD9B1007DF49B /* makelefsecommand.cpp */,
A7E6F69C17427CF2006775E2 /* makelookupcommand.h */,
A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */,
A7E9B74A12D37EC400DA6239 /* matrixoutputcommand.h */,
A727864312E9E28C00F86ABA /* removerarecommand.cpp */,
A7E9B7CA12D37EC400DA6239 /* removeseqscommand.h */,
A7E9B7C912D37EC400DA6239 /* removeseqscommand.cpp */,
+ A7CFA42F1755400500D9ED4D /* renameseqscommand.h */,
+ A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */,
A7E9B7CE12D37EC400DA6239 /* reversecommand.h */,
A7E9B7CD12D37EC400DA6239 /* reversecommand.cpp */,
A7E9B7D212D37EC400DA6239 /* screenseqscommand.h */,
A7E9B6BD12D37EC400DA6239 /* database.cpp */,
A7E9B6BE12D37EC400DA6239 /* database.hpp */,
A7E9B6BF12D37EC400DA6239 /* datavector.hpp */,
+ A77916E7176F7F7600EEFE18 /* designmap.h */,
+ A77916E6176F7F7600EEFE18 /* designmap.cpp */,
A7E9B6CE12D37EC400DA6239 /* distancedb.hpp */,
A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */,
A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */,
A7386C231619CCE600651424 /* classifysharedcommand.cpp in Sources */,
A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */,
- A7386C27161A0F9D00651424 /* abstractrandomforest.cpp in Sources */,
A7386C29161A110800651424 /* decisiontree.cpp in Sources */,
A77E1938161B201E00DB1A2A /* randomforest.cpp in Sources */,
A77E193B161B289600DB1A2A /* rftreenode.cpp in Sources */,
A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */,
A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */,
A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */,
+ A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */,
+ A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */,
+ A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */,
+ A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
GCC_ENABLE_SSE3_EXTENSIONS = NO;
GCC_ENABLE_SSE41_EXTENSIONS = NO;
GCC_ENABLE_SSE42_EXTENSIONS = NO;
- GCC_OPTIMIZATION_LEVEL = 3;
+ GCC_OPTIMIZATION_LEVEL = s;
GCC_PREPROCESSOR_DEFINITIONS = (
"MOTHUR_FILES=\"\\\"../../release\\\"\"",
- "VERSION=\"\\\"1.29.2\\\"\"",
- "RELEASE_DATE=\"\\\"2/12/2013\\\"\"",
+ "VERSION=\"\\\"1.31.0\\\"\"",
+ "RELEASE_DATE=\"\\\"5/24/2013\\\"\"",
);
"GCC_VERSION[arch=*]" = "";
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
GCC_MODEL_TUNING = "";
- GCC_OPTIMIZATION_LEVEL = 3;
+ GCC_OPTIMIZATION_LEVEL = s;
GCC_PREPROCESSOR_DEFINITIONS = (
- "VERSION=\"\\\"1.30.0\\\"\"",
- "RELEASE_DATE=\"\\\"4/01/2013\\\"\"",
+ "VERSION=\"\\\"1.31.0\\\"\"",
+ "RELEASE_DATE=\"\\\"5/24/2013\\\"\"",
);
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
/**************************************************************************************************/
-AbstractDecisionTree::AbstractDecisionTree(vector<vector<int> >baseDataSet,
- vector<int> globalDiscardedFeatureIndices,
- OptimumFeatureSubsetSelector optimumFeatureSubsetSelector,
- string treeSplitCriterion) : baseDataSet(baseDataSet),
-numSamples((int)baseDataSet.size()),
-numFeatures((int)(baseDataSet[0].size() - 1)),
-numOutputClasses(0),
-rootNode(NULL),
-globalDiscardedFeatureIndices(globalDiscardedFeatureIndices),
-optimumFeatureSubsetSize(optimumFeatureSubsetSelector.getOptimumFeatureSubsetSize(numFeatures)),
-treeSplitCriterion(treeSplitCriterion) {
+AbstractDecisionTree::AbstractDecisionTree(vector<vector<int> >& baseDataSet,
+ vector<int> globalDiscardedFeatureIndices,
+ OptimumFeatureSubsetSelector optimumFeatureSubsetSelector,
+ string treeSplitCriterion)
+
+ : baseDataSet(baseDataSet),
+ numSamples((int)baseDataSet.size()),
+ numFeatures((int)(baseDataSet[0].size() - 1)),
+ numOutputClasses(0),
+ rootNode(NULL),
+ nodeIdCount(0),
+ globalDiscardedFeatureIndices(globalDiscardedFeatureIndices),
+ optimumFeatureSubsetSize(optimumFeatureSubsetSelector.getOptimumFeatureSubsetSize(numFeatures)),
+ treeSplitCriterion(treeSplitCriterion) {
try {
- // TODO: istead of calculating this for every DecisionTree
- // clacualte this once in the RandomForest class and pass the values
- m = MothurOut::getInstance();
- for (int i = 0; i < numSamples; i++) {
- if (m->control_pressed) { break; }
- int outcome = baseDataSet[i][numFeatures];
- vector<int>::iterator it = find(outputClasses.begin(), outputClasses.end(), outcome);
- if (it == outputClasses.end()){ // find() will return classes.end() if the element is not found
- outputClasses.push_back(outcome);
- numOutputClasses++;
+ // TODO: istead of calculating this for every DecisionTree
+ // clacualte this once in the RandomForest class and pass the values
+ m = MothurOut::getInstance();
+ for (int i = 0; i < numSamples; i++) {
+ if (m->control_pressed) { break; }
+ int outcome = baseDataSet[i][numFeatures];
+ vector<int>::iterator it = find(outputClasses.begin(), outputClasses.end(), outcome);
+ if (it == outputClasses.end()){ // find() will return classes.end() if the element is not found
+ outputClasses.push_back(outcome);
+ numOutputClasses++;
+ }
+ }
+
+ if (m->debug) {
+ //m->mothurOut("outputClasses = " + toStringVectorInt(outputClasses));
+ m->mothurOut("numOutputClasses = " + toString(numOutputClasses) + '\n');
}
- }
-
- if (m->debug) {
- //m->mothurOut("outputClasses = " + toStringVectorInt(outputClasses));
- m->mothurOut("numOutputClasses = " + toString(numOutputClasses) + '\n');
- }
}
catch(exception& e) {
/**************************************************************************************************/
int AbstractDecisionTree::createBootStrappedSamples(){
try {
- vector<bool> isInTrainingSamples(numSamples, false);
-
- for (int i = 0; i < numSamples; i++) {
- if (m->control_pressed) { return 0; }
- // TODO: optimize the rand() function call + double check if it's working properly
- int randomIndex = rand() % numSamples;
- bootstrappedTrainingSamples.push_back(baseDataSet[randomIndex]);
- isInTrainingSamples[randomIndex] = true;
- }
-
- for (int i = 0; i < numSamples; i++) {
- if (m->control_pressed) { return 0; }
- if (isInTrainingSamples[i]){ bootstrappedTrainingSampleIndices.push_back(i); }
- else{
- bootstrappedTestSamples.push_back(baseDataSet[i]);
- bootstrappedTestSampleIndices.push_back(i);
+ vector<bool> isInTrainingSamples(numSamples, false);
+
+ for (int i = 0; i < numSamples; i++) {
+ if (m->control_pressed) { return 0; }
+ // TODO: optimize the rand() function call + double check if it's working properly
+ int randomIndex = rand() % numSamples;
+ bootstrappedTrainingSamples.push_back(baseDataSet[randomIndex]);
+ isInTrainingSamples[randomIndex] = true;
}
- }
-
+
+ for (int i = 0; i < numSamples; i++) {
+ if (m->control_pressed) { return 0; }
+ if (isInTrainingSamples[i]){ bootstrappedTrainingSampleIndices.push_back(i); }
+ else{
+ bootstrappedTestSamples.push_back(baseDataSet[i]);
+ bootstrappedTestSampleIndices.push_back(i);
+ }
+ }
+
+ // do the transpose of Test Samples
+ for (int i = 0; i < bootstrappedTestSamples[0].size(); i++) {
+ if (m->control_pressed) { return 0; }
+
+ vector<int> tmpFeatureVector(bootstrappedTestSamples.size(), 0);
+ for (int j = 0; j < bootstrappedTestSamples.size(); j++) {
+ if (m->control_pressed) { return 0; }
+
+ tmpFeatureVector[j] = bootstrappedTestSamples[j][i];
+ }
+ testSampleFeatureVectors.push_back(tmpFeatureVector);
+ }
+
return 0;
}
catch(exception& e) {
}
}
/**************************************************************************************************/
-int AbstractDecisionTree::getMinEntropyOfFeature(vector<int> featureVector, vector<int> outputVector, double& minEntropy, int& featureSplitValue, double& intrinsicValue){
+int AbstractDecisionTree::getMinEntropyOfFeature(vector<int> featureVector,
+ vector<int> outputVector,
+ double& minEntropy,
+ int& featureSplitValue,
+ double& intrinsicValue){
try {
- vector< vector<int> > featureOutputPair(featureVector.size(), vector<int>(2, 0));
+ vector< pair<int, int> > featureOutputPair(featureVector.size(), pair<int, int>(0, 0));
+
for (int i = 0; i < featureVector.size(); i++) {
if (m->control_pressed) { return 0; }
- featureOutputPair[i][0] = featureVector[i];
- featureOutputPair[i][1] = outputVector[i];
+
+ featureOutputPair[i].first = featureVector[i];
+ featureOutputPair[i].second = outputVector[i];
}
- // TODO: using default behavior to sort(), need to specify the comparator for added safety and compiler portability
- sort(featureOutputPair.begin(), featureOutputPair.end());
+ // TODO: using default behavior to sort(), need to specify the comparator for added safety and compiler portability,
+ IntPairVectorSorter intPairVectorSorter;
+ sort(featureOutputPair.begin(), featureOutputPair.end(), intPairVectorSorter);
vector<int> splitPoints;
- vector<int> uniqueFeatureValues(1, featureOutputPair[0][0]);
+ vector<int> uniqueFeatureValues(1, featureOutputPair[0].first);
for (int i = 0; i < featureOutputPair.size(); i++) {
+
if (m->control_pressed) { return 0; }
- int featureValue = featureOutputPair[i][0];
+ int featureValue = featureOutputPair[i].first;
+
vector<int>::iterator it = find(uniqueFeatureValues.begin(), uniqueFeatureValues.end(), featureValue);
if (it == uniqueFeatureValues.end()){ // NOT FOUND
uniqueFeatureValues.push_back(featureValue);
featureSplitValue = -1; // OUTPUT
}else{
getBestSplitAndMinEntropy(featureOutputPair, splitPoints, minEntropy, bestSplitIndex, intrinsicValue); // OUTPUT
- featureSplitValue = featureOutputPair[splitPoints[bestSplitIndex]][0]; // OUTPUT
+ featureSplitValue = featureOutputPair[splitPoints[bestSplitIndex]].first; // OUTPUT
}
return 0;
}
}
/**************************************************************************************************/
-int AbstractDecisionTree::getBestSplitAndMinEntropy(vector< vector<int> > featureOutputPairs, vector<int> splitPoints,
- double& minEntropy, int& minEntropyIndex, double& relatedIntrinsicValue){
+
+int AbstractDecisionTree::getBestSplitAndMinEntropy(vector< pair<int, int> > featureOutputPairs, vector<int> splitPoints,
+ double& minEntropy, int& minEntropyIndex, double& relatedIntrinsicValue){
try {
int numSamples = (int)featureOutputPairs.size();
vector<double> intrinsicValues;
for (int i = 0; i < splitPoints.size(); i++) {
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { return 0; }
int index = splitPoints[i];
- int valueAtSplitPoint = featureOutputPairs[index][0];
+ int valueAtSplitPoint = featureOutputPairs[index].first;
+
int numLessThanValueAtSplitPoint = 0;
int numGreaterThanValueAtSplitPoint = 0;
for (int j = 0; j < featureOutputPairs.size(); j++) {
- if (m->control_pressed) { return 0; }
- vector<int> record = featureOutputPairs[j];
- if (record[0] < valueAtSplitPoint){ numLessThanValueAtSplitPoint++; }
+ if (m->control_pressed) { return 0; }
+ pair<int, int> record = featureOutputPairs[j];
+ if (record.first < valueAtSplitPoint){ numLessThanValueAtSplitPoint++; }
else{ numGreaterThanValueAtSplitPoint++; }
}
}
/**************************************************************************************************/
-double AbstractDecisionTree::calcSplitEntropy(vector< vector<int> > featureOutputPairs, int splitIndex, int numOutputClasses, bool isUpperSplit = true) {
+double AbstractDecisionTree::calcSplitEntropy(vector< pair<int, int> > featureOutputPairs, int splitIndex, int numOutputClasses, bool isUpperSplit = true) {
try {
vector<int> classCounts(numOutputClasses, 0);
if (isUpperSplit) {
- for (int i = 0; i < splitIndex; i++) {
+ for (int i = 0; i < splitIndex; i++) {
if (m->control_pressed) { return 0; }
- classCounts[featureOutputPairs[i][1]]++;
+ classCounts[featureOutputPairs[i].second]++;
}
} else {
for (int i = splitIndex; i < featureOutputPairs.size(); i++) {
if (m->control_pressed) { return 0; }
- classCounts[featureOutputPairs[i][1]]++;
+ classCounts[featureOutputPairs[i].second]++;
}
}
if (m->control_pressed) { return 0; }
vector<int> sample = node->getBootstrappedTrainingSamples()[i];
if (m->control_pressed) { return 0; }
- if (sample[splitFeatureGlobalIndex] < node->getSplitFeatureValue()){ leftChildSamples.push_back(sample); }
- else{ rightChildSamples.push_back(sample); }
+
+ if (sample[splitFeatureGlobalIndex] < node->getSplitFeatureValue()) { leftChildSamples.push_back(sample); }
+ else { rightChildSamples.push_back(sample); }
}
return 0;
// Copyright (c) 2012 Schloss Lab. All rights reserved.
//
-#ifndef rrf_fs_prototype_abstractdecisiontree_hpp
-#define rrf_fs_prototype_abstractdecisiontree_hpp
+#ifndef RF_ABSTRACTDECISIONTREE_HPP
+#define RF_ABSTRACTDECISIONTREE_HPP
#include "mothurout.h"
#include "macros.h"
/**************************************************************************************************/
+struct IntPairVectorSorter{
+ bool operator() (const pair<int, int>& firstPair, const pair<int, int>& secondPair) {
+ return firstPair.first < secondPair.first;
+ }
+};
+
+/**************************************************************************************************/
+
class AbstractDecisionTree{
public:
- AbstractDecisionTree(vector<vector<int> >baseDataSet,
- vector<int> globalDiscardedFeatureIndices,
- OptimumFeatureSubsetSelector optimumFeatureSubsetSelector,
- string treeSplitCriterion);
+ AbstractDecisionTree(vector<vector<int> >& baseDataSet,
+ vector<int> globalDiscardedFeatureIndices,
+ OptimumFeatureSubsetSelector optimumFeatureSubsetSelector,
+ string treeSplitCriterion);
virtual ~AbstractDecisionTree(){}
virtual int createBootStrappedSamples();
virtual int getMinEntropyOfFeature(vector<int> featureVector, vector<int> outputVector, double& minEntropy, int& featureSplitValue, double& intrinsicValue);
- virtual int getBestSplitAndMinEntropy(vector< vector<int> > featureOutputPairs, vector<int> splitPoints, double& minEntropy, int& minEntropyIndex, double& relatedIntrinsicValue);
+ virtual int getBestSplitAndMinEntropy(vector< pair<int, int> > featureOutputPairs, vector<int> splitPoints, double& minEntropy, int& minEntropyIndex, double& relatedIntrinsicValue);
virtual double calcIntrinsicValue(int numLessThanValueAtSplitPoint, int numGreaterThanValueAtSplitPoint, int numSamples);
- virtual double calcSplitEntropy(vector< vector<int> > featureOutputPairs, int splitIndex, int numOutputClasses, bool);
+ virtual double calcSplitEntropy(vector< pair<int, int> > featureOutputPairs, int splitIndex, int numOutputClasses, bool);
+
virtual int getSplitPopulation(RFTreeNode* node, vector< vector<int> >& leftChildSamples, vector< vector<int> >& rightChildSamples);
virtual bool checkIfAlreadyClassified(RFTreeNode* treeNode, int& outputClass);
- vector< vector<int> > baseDataSet;
+ vector< vector<int> >& baseDataSet;
int numSamples;
int numFeatures;
int numOutputClasses;
vector<int> outputClasses;
+
vector< vector<int> > bootstrappedTrainingSamples;
vector<int> bootstrappedTrainingSampleIndices;
vector< vector<int> > bootstrappedTestSamples;
vector<int> bootstrappedTestSampleIndices;
+ vector<vector<int> > testSampleFeatureVectors;
+
RFTreeNode* rootNode;
+ int nodeIdCount;
+ map<int, int> nodeMisclassificationCounts;
vector<int> globalDiscardedFeatureIndices;
int optimumFeatureSubsetSize;
string treeSplitCriterion;
//for each word
for (int i = 0; i < numKmers; i++) {
+ //m->mothurOut("[DEBUG]: kmer = " + toString(i) + "\n");
+
if (m->control_pressed) { break; }
#ifdef USE_MPI
}
}
+ if (m->debug) { m->mothurOut("[DEBUG]: about to generateWordPairDiffArr\n"); }
generateWordPairDiffArr();
+ if (m->debug) { m->mothurOut("[DEBUG]: done generateWordPairDiffArr\n"); }
//save probabilities
if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
// Copyright (c) 2012 University of Michigan. All rights reserved.
//
-#include "calcSparcc.h"
+#include "calcsparcc.h"
#include "linearalgebra.h"
/**************************************************************************************************/
taxonomy.clear();
m->readTax(file, taxonomy);
- map<string, string> tempTaxonomy;
+
+ //commented out to save time with large templates. 6/12/13
+ //map<string, string> tempTaxonomy;
for (map<string, string>::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) {
- if (m->inUsersGroups(itTax->first, names)) {
- phyloTree->addSeqToTree(itTax->first, itTax->second);
- tempTaxonomy[itTax->first] = itTax->second;
- }else {
- m->mothurOut("[WARNING]: " + itTax->first + " is in your taxonomy file and not in your reference file, ignoring.\n");
- }
+ //if (m->inUsersGroups(itTax->first, names)) {
+ phyloTree->addSeqToTree(itTax->first, itTax->second);
+ if (m->control_pressed) { break; }
+ //tempTaxonomy[itTax->first] = itTax->second;
+ // }else {
+ // m->mothurOut("[WARNING]: " + itTax->first + " is in your taxonomy file and not in your reference file, ignoring.\n");
+ //}
}
- taxonomy = tempTaxonomy;
+ //taxonomy = tempTaxonomy;
#endif
phyloTree->assignHeirarchyIDs(0);
vector<string> Classify::parseTax(string tax) {
try {
vector<string> taxons;
-
- tax = tax.substr(0, tax.length()-1); //get rid of last ';'
-
- //parse taxonomy
- string individual;
- while (tax.find_first_of(';') != -1) {
- individual = tax.substr(0,tax.find_first_of(';'));
- tax = tax.substr(tax.find_first_of(';')+1, tax.length());
- taxons.push_back(individual);
-
- }
- //get last one
- taxons.push_back(tax);
-
- return taxons;
+ m->splitAtChar(tax, taxons, ';');
+ return taxons;
}
catch(exception& e) {
m->errorOut(e, "Classify", "parseTax");
CommandParameter potupersplit("otupersplit", "Multiple", "log2-squareroot", "log2", "", "", "","",false,false); parameters.push_back(potupersplit);
CommandParameter psplitcriteria("splitcriteria", "Multiple", "gainratio-infogain", "gainratio", "", "", "","",false,false); parameters.push_back(psplitcriteria);
CommandParameter pnumtrees("numtrees", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pnumtrees);
+
+ // parameters related to pruning
+ CommandParameter pdopruning("prune", "Boolean", "", "T", "", "", "", "", false, false); parameters.push_back(pdopruning);
+ CommandParameter ppruneaggrns("pruneaggressiveness", "Number", "", "0.9", "", "", "", "", false, false); parameters.push_back(ppruneaggrns);
+ CommandParameter pdiscardhetrees("discarderrortrees", "Boolean", "", "T", "", "", "", "", false, false); parameters.push_back(pdiscardhetrees);
+ CommandParameter phetdiscardthreshold("errorthreshold", "Number", "", "0.4", "", "", "", "", false, false); parameters.push_back(phetdiscardthreshold);
+ CommandParameter psdthreshold("stdthreshold", "Number", "", "0.0", "", "", "", "", false, false); parameters.push_back(psdthreshold);
+ // pruning params end
CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
exit(1);
}
}
+
//**********************************************************************************************************************
ClassifySharedCommand::ClassifySharedCommand(string option) {
try {
for (it = parameters.begin(); it != parameters.end(); it++) {
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
-
vector<string> tempOutNames;
outputTypes["summary"] = tempOutNames;
}
}
-
//check for parameters
//get shared file, it is required
sharedfile = validParameter.validFile(parameters, "shared", true);
outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
}
-
// NEW CODE for OTU per split selection criteria
- otupersplit = validParameter.validFile(parameters, "otupersplit", false);
- if (otupersplit == "not found") { otupersplit = "log2"; }
- if ((otupersplit == "squareroot") || (otupersplit == "log2")) {
- optimumFeatureSubsetSelectionCriteria = otupersplit;
- }else { m->mothurOut("Not a valid OTU per split selection method. Valid OTU per split selection methods are 'log2' and 'squareroot'."); m->mothurOutEndLine(); abort = true; }
-
- // splitcriteria
- splitcriteria = validParameter.validFile(parameters, "splitcriteria", false);
- if (splitcriteria == "not found") { splitcriteria = "gainratio"; }
- if ((splitcriteria == "gainratio") || (splitcriteria == "infogain")) {
- treeSplitCriterion = splitcriteria;
- }else { m->mothurOut("Not a valid tree splitting criterio. Valid tree splitting criteria are 'gainratio' and 'infogain'."); m->mothurOutEndLine(); abort = true; }
-
-
- string temp = validParameter.validFile(parameters, "numtrees", false); if (temp == "not found"){ temp = "100"; }
- m->mothurConvert(temp, numDecisionTrees);
-
+ string temp = validParameter.validFile(parameters, "splitcriteria", false);
+ if (temp == "not found") { temp = "gainratio"; }
+ if ((temp == "gainratio") || (temp == "infogain")) {
+ treeSplitCriterion = temp;
+ } else { m->mothurOut("Not a valid tree splitting criterio. Valid tree splitting criteria are 'gainratio' and 'infogain'.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+
+ temp = validParameter.validFile(parameters, "numtrees", false); if (temp == "not found"){ temp = "100"; }
+ m->mothurConvert(temp, numDecisionTrees);
+
+ // parameters for pruning
+ temp = validParameter.validFile(parameters, "prune", false);
+ if (temp == "not found") { temp = "f"; }
+ doPruning = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "pruneaggressiveness", false);
+ if (temp == "not found") { temp = "0.9"; }
+ m->mothurConvert(temp, pruneAggressiveness);
+
+ temp = validParameter.validFile(parameters, "discarderrortrees", false);
+ if (temp == "not found") { temp = "f"; }
+ discardHighErrorTrees = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "errorthreshold", false);
+ if (temp == "not found") { temp = "0.4"; }
+ m->mothurConvert(temp, highErrorTreeDiscardThreshold);
+
+ temp = validParameter.validFile(parameters, "otupersplit", false);
+ if (temp == "not found") { temp = "log2"; }
+ if ((temp == "squareroot") || (temp == "log2")) {
+ optimumFeatureSubsetSelectionCriteria = temp;
+ } else { m->mothurOut("Not a valid OTU per split selection method. Valid OTU per split selection methods are 'log2' and 'squareroot'.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+
+ temp = validParameter.validFile(parameters, "stdthreshold", false);
+ if (temp == "not found") { temp = "0.0"; }
+ m->mothurConvert(temp, featureStandardDeviationThreshold);
+
+ // end of pruning params
+
//Groups must be checked later to make sure they are valid. SharedUtilities has functions of check the validity, just make to so m->setGroups() after the checks. If you are using these with a shared file no need to check the SharedRAbundVector class will call SharedUtilites for you, kinda nice, huh?
string groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
lookup = input.getSharedRAbundVectors(lastLabel);
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-
processSharedAndDesignData(lookup);
processedLabels.insert(lookup[0]->getLabel());
dataSet[i][j] = treatmentToIntMap[treatmentName];
}
- RandomForest randomForest(dataSet, numDecisionTrees, treeSplitCriterion);
+ RandomForest randomForest(dataSet, numDecisionTrees, treeSplitCriterion, doPruning, pruneAggressiveness, discardHighErrorTrees, highErrorTreeDiscardThreshold, optimumFeatureSubsetSelectionCriteria, featureStandardDeviationThreshold);
+
randomForest.populateDecisionTrees();
randomForest.calcForrestErrorRate();
vector<string> setParameters();
string getCommandName() { return "classify.shared"; }
- string getCommandCategory() { return "OTU-Based Approaches"; }
-
+ string getCommandCategory() { return "OTU-Based Approaches"; }
string getHelpString();
- string getOutputPattern(string);
+ string getOutputPattern(string);
string getCitation() { return "http://www.mothur.org/wiki/Classify.shared\n"; }
- string getDescription() { return "description"; }
+ string getDescription() { return "implements the random forest machine learning algorithm to identify OTUs that can be used to differentiate between various groups of samples"; }
int execute();
void help() { m->mothurOut(getHelpString()); }
string outputDir;
vector<string> outputNames, Groups;
- string sharedfile, designfile, otupersplit, splitcriteria;
+ string sharedfile, designfile;
set<string> labels;
bool allLines;
#ifndef CLUSTER_H
#define CLUSTER_H
-
+//test change
#include "mothur.h"
#include "sparsedistancematrix.h"
countfile = validParameter.validFile(parameters, "count", true);
if (countfile == "not open") { abort = true; countfile = ""; }
else if (countfile == "not found") { countfile = ""; }
- else { ct.readTable(countfile, false); m->setCountTableFile(countfile); }
+ else { ct.readTable(countfile, true); m->setCountTableFile(countfile); }
if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.fragments command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
#include "getmetacommunitycommand.h"
#include "sparcccommand.h"
#include "makelookupcommand.h"
+#include "renameseqscommand.h"
+#include "makelefsecommand.h"
+#include "lefsecommand.h"
+#include "kruskalwalliscommand.h"
/*******************************************************/
commands["get.metacommunity"] = "get.metacommunity";
commands["sparcc"] = "sparcc";
commands["make.lookup"] = "make.lookup";
+ commands["rename.seqs"] = "rename.seqs";
+ commands["make.lefse"] = "make.lefse";
+ commands["lefse"] = "lefse";
+ commands["kruskal.wallis"] = "kruskal.wallis";
}
else if(commandName == "get.metacommunity") { command = new GetMetaCommunityCommand(optionString); }
else if(commandName == "sparcc") { command = new SparccCommand(optionString); }
else if(commandName == "make.lookup") { command = new MakeLookupCommand(optionString); }
+ else if(commandName == "rename.seqs") { command = new RenameSeqsCommand(optionString); }
+ else if(commandName == "make.lefse") { command = new MakeLefseCommand(optionString); }
+ else if(commandName == "lefse") { command = new LefseCommand(optionString); }
+ else if(commandName == "kruskal.wallis") { command = new KruskalWallisCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "get.metacommunity") { pipecommand = new GetMetaCommunityCommand(optionString); }
else if(commandName == "sparcc") { pipecommand = new SparccCommand(optionString); }
else if(commandName == "make.lookup") { pipecommand = new MakeLookupCommand(optionString); }
+ else if(commandName == "rename.seqs") { pipecommand = new RenameSeqsCommand(optionString); }
+ else if(commandName == "make.lefse") { pipecommand = new MakeLefseCommand(optionString); }
+ else if(commandName == "lefse") { pipecommand = new LefseCommand(optionString); }
+ else if(commandName == "kruskal.wallis") { pipecommand = new KruskalWallisCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "get.metacommunity") { shellcommand = new GetMetaCommunityCommand(); }
else if(commandName == "sparcc") { shellcommand = new SparccCommand(); }
else if(commandName == "make.lookup") { shellcommand = new MakeLookupCommand(); }
+ else if(commandName == "rename.seqs") { shellcommand = new RenameSeqsCommand(); }
+ else if(commandName == "make.lefse") { shellcommand = new MakeLefseCommand(); }
+ else if(commandName == "lefse") { shellcommand = new LefseCommand(); }
+ else if(commandName == "kruskal.wallis") { shellcommand = new KruskalWallisCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
if (countfile == "") {
sort(binNames.begin(), binNames.end());
bin = "";
- for (int i = 0; i < binNames.size()-1; i++) {
- bin += binNames[i] + ',';
+ for (int j = 0; j < binNames.size()-1; j++) {
+ bin += binNames[j] + ',';
}
bin += binNames[binNames.size()-1];
map<string, string>::iterator it = repNames.find(bin);
return lookup;
}
catch(exception& e) {
- m->errorOut(e, "CreateDatabaseCommand", "getList");
+ m->errorOut(e, "CreateDatabaseCommand", "getShared");
exit(1);
}
}
#include "decisiontree.hpp"
-DecisionTree::DecisionTree(vector< vector<int> > baseDataSet,
- vector<int> globalDiscardedFeatureIndices,
- OptimumFeatureSubsetSelector optimumFeatureSubsetSelector,
- string treeSplitCriterion) : AbstractDecisionTree(baseDataSet,
- globalDiscardedFeatureIndices,
- optimumFeatureSubsetSelector,
- treeSplitCriterion), variableImportanceList(numFeatures, 0){
+DecisionTree::DecisionTree(vector< vector<int> >& baseDataSet,
+ vector<int> globalDiscardedFeatureIndices,
+ OptimumFeatureSubsetSelector optimumFeatureSubsetSelector,
+ string treeSplitCriterion,
+ float featureStandardDeviationThreshold)
+ : AbstractDecisionTree(baseDataSet,
+ globalDiscardedFeatureIndices,
+ optimumFeatureSubsetSelector,
+ treeSplitCriterion),
+ variableImportanceList(numFeatures, 0),
+ featureStandardDeviationThreshold(featureStandardDeviationThreshold) {
+
try {
m = MothurOut::getInstance();
createBootStrappedSamples();
/***********************************************************************/
-int DecisionTree::calcTreeVariableImportanceAndError() {
+int DecisionTree::calcTreeVariableImportanceAndError(int& numCorrect, double& treeErrorRate) {
try {
+ vector< vector<int> > randomlySampledTestData(bootstrappedTestSamples.size(), vector<int>(bootstrappedTestSamples[0].size(), 0));
- int numCorrect;
- double treeErrorRate;
- calcTreeErrorRate(numCorrect, treeErrorRate);
+ // TODO: is is possible to further speed up the following O(N^2) by using std::copy?
+ for (int i = 0; i < bootstrappedTestSamples.size(); i++) {
+ for (int j = 0; j < bootstrappedTestSamples[i].size(); j++) {
+ randomlySampledTestData[i][j] = bootstrappedTestSamples[i][j];
+ }
+ }
- if (m->control_pressed) {return 0; }
-
for (int i = 0; i < numFeatures; i++) {
- if (m->control_pressed) {return 0; }
- // NOTE: only shuffle the features, never shuffle the output vector
- // so i = 0 and i will be alwaays <= (numFeatures - 1) as the index at numFeatures will denote
- // the feature vector
- vector< vector<int> > randomlySampledTestData = randomlyShuffleAttribute(bootstrappedTestSamples, i);
+ if (m->control_pressed) { return 0; }
- int numCorrectAfterShuffle = 0;
- for (int j = 0; j < randomlySampledTestData.size(); j++) {
- if (m->control_pressed) {return 0; }
- vector<int> shuffledSample = randomlySampledTestData[j];
- int actualSampleOutputClass = shuffledSample[numFeatures];
- int predictedSampleOutputClass = evaluateSample(shuffledSample);
- if (actualSampleOutputClass == predictedSampleOutputClass) { numCorrectAfterShuffle++; }
+ // if the index is in globalDiscardedFeatureIndices (i.e, null feature) we don't want to shuffle them
+ vector<int>::iterator it = find(globalDiscardedFeatureIndices.begin(), globalDiscardedFeatureIndices.end(), i);
+ if (it == globalDiscardedFeatureIndices.end()) { // NOT FOUND
+ // if the standard deviation is very low, we know it's not a good feature at all
+ // we can save some time here by discarding that feature
+
+ vector<int> featureVector = testSampleFeatureVectors[i];
+ if (m->getStandardDeviation(featureVector) > featureStandardDeviationThreshold) {
+ // NOTE: only shuffle the features, never shuffle the output vector
+ // so i = 0 and i will be alwaays <= (numFeatures - 1) as the index at numFeatures will denote
+ // the feature vector
+ randomlyShuffleAttribute(bootstrappedTestSamples, i, i - 1, randomlySampledTestData);
+
+ int numCorrectAfterShuffle = 0;
+ for (int j = 0; j < randomlySampledTestData.size(); j++) {
+ if (m->control_pressed) {return 0; }
+
+ vector<int> shuffledSample = randomlySampledTestData[j];
+ int actualSampleOutputClass = shuffledSample[numFeatures];
+ int predictedSampleOutputClass = evaluateSample(shuffledSample);
+ if (actualSampleOutputClass == predictedSampleOutputClass) { numCorrectAfterShuffle++; }
+ }
+ variableImportanceList[i] += (numCorrect - numCorrectAfterShuffle);
+ }
}
- variableImportanceList[i] += (numCorrect - numCorrectAfterShuffle);
}
// TODO: do we need to save the variableRanks in the DecisionTree, do we need it later?
- vector< vector<int> > variableRanks;
+ vector< pair<int, int> > variableRanks;
+
for (int i = 0; i < variableImportanceList.size(); i++) {
if (m->control_pressed) {return 0; }
if (variableImportanceList[i] > 0) {
// TODO: is there a way to optimize the follow line's code?
- vector<int> variableRank(2, 0);
- variableRank[0] = i; variableRank[1] = variableImportanceList[i];
+ pair<int, int> variableRank(0, 0);
+ variableRank.first = i;
+ variableRank.second = variableImportanceList[i];
variableRanks.push_back(variableRank);
}
}
try {
RFTreeNode *node = rootNode;
while (true) {
- if (m->control_pressed) {return 0; }
+ if (m->control_pressed) { return 0; }
+
if (node->checkIsLeaf()) { return node->getOutputClass(); }
+
int sampleSplitFeatureValue = testSample[node->getSplitFeatureIndex()];
if (sampleSplitFeatureValue < node->getSplitFeatureValue()) { node = node->getLeftChildNode(); }
else { node = node->getRightChildNode(); }
/***********************************************************************/
int DecisionTree::calcTreeErrorRate(int& numCorrect, double& treeErrorRate){
+ numCorrect = 0;
try {
- numCorrect = 0;
for (int i = 0; i < bootstrappedTestSamples.size(); i++) {
if (m->control_pressed) {return 0; }
}
/***********************************************************************/
-
// TODO: optimize the algo, instead of transposing two time, we can extarct the feature,
// shuffle it and then re-insert in the original place, thus iproving runnting time
//This function randomize abundances for a given OTU/feature.
-vector< vector<int> > DecisionTree::randomlyShuffleAttribute(vector< vector<int> > samples, int featureIndex) {
+
+void DecisionTree::randomlyShuffleAttribute(const vector< vector<int> >& samples,
+ const int featureIndex,
+ const int prevFeatureIndex,
+ vector< vector<int> >& shuffledSample) {
try {
// NOTE: we need (numFeatures + 1) featureVecotors, the last extra vector is actually outputVector
- vector< vector<int> > shuffledSample = samples;
- vector<int> featureVectors(samples.size(), 0);
+ // restore previously shuffled feature
+ if (prevFeatureIndex > -1) {
+ for (int j = 0; j < samples.size(); j++) {
+ if (m->control_pressed) { return; }
+ shuffledSample[j][prevFeatureIndex] = samples[j][prevFeatureIndex];
+ }
+ }
+
+ // now do the shuffling
+ vector<int> featureVectors(samples.size(), 0);
for (int j = 0; j < samples.size(); j++) {
- if (m->control_pressed) { return shuffledSample; }
+ if (m->control_pressed) { return; }
featureVectors[j] = samples[j][featureIndex];
}
-
random_shuffle(featureVectors.begin(), featureVectors.end());
-
for (int j = 0; j < samples.size(); j++) {
- if (m->control_pressed) {return shuffledSample; }
+ if (m->control_pressed) { return; }
shuffledSample[j][featureIndex] = featureVectors[j];
}
-
- return shuffledSample;
}
catch(exception& e) {
- m->errorOut(e, "DecisionTree", "randomlyShuffleAttribute");
+ m->errorOut(e, "DecisionTree", "randomlyShuffleAttribute");
exit(1);
- }
+ }
+
}
+
/***********************************************************************/
int DecisionTree::purgeTreeNodesDataRecursively(RFTreeNode* treeNode) {
void DecisionTree::buildDecisionTree(){
try {
- int generation = 0;
- rootNode = new RFTreeNode(bootstrappedTrainingSamples, globalDiscardedFeatureIndices, numFeatures, numSamples, numOutputClasses, generation);
-
- splitRecursively(rootNode);
+ int generation = 0;
+ rootNode = new RFTreeNode(bootstrappedTrainingSamples, globalDiscardedFeatureIndices, numFeatures, numSamples, numOutputClasses, generation, nodeIdCount, featureStandardDeviationThreshold);
+ nodeIdCount++;
+
+ splitRecursively(rootNode);
+
}
catch(exception& e) {
m->errorOut(e, "DecisionTree", "buildDecisionTree");
rootNode->setOutputClass(classifiedOutputClass);
return 0;
}
- if (m->control_pressed) {return 0;}
+ if (m->control_pressed) { return 0; }
vector<int> featureSubsetIndices = selectFeatureSubsetRandomly(globalDiscardedFeatureIndices, rootNode->getLocalDiscardedFeatureIndices());
+
+ // TODO: need to check if the value is actually copied correctly
rootNode->setFeatureSubsetIndices(featureSubsetIndices);
- if (m->control_pressed) {return 0;}
+ if (m->control_pressed) { return 0; }
findAndUpdateBestFeatureToSplitOn(rootNode);
- if (m->control_pressed) {return 0;}
+ // update rootNode outputClass, this is needed for pruning
+ // this is only for internal nodes
+ updateOutputClassOfNode(rootNode);
+
+ if (m->control_pressed) { return 0; }
vector< vector<int> > leftChildSamples;
vector< vector<int> > rightChildSamples;
getSplitPopulation(rootNode, leftChildSamples, rightChildSamples);
- if (m->control_pressed) {return 0;}
+ if (m->control_pressed) { return 0; }
// TODO: need to write code to clear this memory
- RFTreeNode* leftChildNode = new RFTreeNode(leftChildSamples, globalDiscardedFeatureIndices, numFeatures, (int)leftChildSamples.size(), numOutputClasses, rootNode->getGeneration() + 1);
- RFTreeNode* rightChildNode = new RFTreeNode(rightChildSamples, globalDiscardedFeatureIndices, numFeatures, (int)rightChildSamples.size(), numOutputClasses, rootNode->getGeneration() + 1);
+ RFTreeNode* leftChildNode = new RFTreeNode(leftChildSamples, globalDiscardedFeatureIndices, numFeatures, (int)leftChildSamples.size(), numOutputClasses, rootNode->getGeneration() + 1, nodeIdCount, featureStandardDeviationThreshold);
+ nodeIdCount++;
+ RFTreeNode* rightChildNode = new RFTreeNode(rightChildSamples, globalDiscardedFeatureIndices, numFeatures, (int)rightChildSamples.size(), numOutputClasses, rootNode->getGeneration() + 1, nodeIdCount, featureStandardDeviationThreshold);
+ nodeIdCount++;
rootNode->setLeftChildNode(leftChildNode);
leftChildNode->setParentNode(rootNode);
// TODO: This recursive split can be parrallelized later
splitRecursively(leftChildNode);
- if (m->control_pressed) {return 0;}
+ if (m->control_pressed) { return 0; }
splitRecursively(rightChildNode);
return 0;
try {
vector< vector<int> > bootstrappedFeatureVectors = node->getBootstrappedFeatureVectors();
- if (m->control_pressed) {return 0;}
+ if (m->control_pressed) { return 0; }
vector<int> bootstrappedOutputVector = node->getBootstrappedOutputVector();
- if (m->control_pressed) {return 0;}
+ if (m->control_pressed) { return 0; }
vector<int> featureSubsetIndices = node->getFeatureSubsetIndices();
- if (m->control_pressed) {return 0;}
+ if (m->control_pressed) { return 0; }
vector<double> featureSubsetEntropies;
vector<int> featureSubsetSplitValues;
vector<double> featureSubsetGainRatios;
for (int i = 0; i < featureSubsetIndices.size(); i++) {
- if (m->control_pressed) {return 0;}
+ if (m->control_pressed) { return 0; }
int tryIndex = featureSubsetIndices[i];
double featureIntrinsicValue;
getMinEntropyOfFeature(bootstrappedFeatureVectors[tryIndex], bootstrappedOutputVector, featureMinEntropy, featureSplitValue, featureIntrinsicValue);
- if (m->control_pressed) {return 0;}
+ if (m->control_pressed) { return 0; }
featureSubsetEntropies.push_back(featureMinEntropy);
featureSubsetSplitValues.push_back(featureSplitValue);
vector<double>::iterator minEntropyIterator = min_element(featureSubsetEntropies.begin(), featureSubsetEntropies.end());
vector<double>::iterator maxGainRatioIterator = max_element(featureSubsetGainRatios.begin(), featureSubsetGainRatios.end());
double featureMinEntropy = *minEntropyIterator;
- //double featureMaxGainRatio = *maxGainRatioIterator;
+
+ // TODO: kept the following line as future reference, can be useful
+ // double featureMaxGainRatio = *maxGainRatioIterator;
double bestFeatureSplitEntropy = featureMinEntropy;
int bestFeatureToSplitOnIndex = -1;
- if (treeSplitCriterion == "gainRatio"){
+ if (treeSplitCriterion == "gainratio"){
bestFeatureToSplitOnIndex = (int)(maxGainRatioIterator - featureSubsetGainRatios.begin());
// if using 'gainRatio' measure, then featureMinEntropy must be re-updated, as the index
// for 'featureMaxGainRatio' would be different
bestFeatureSplitEntropy = featureSubsetEntropies[bestFeatureToSplitOnIndex];
+ } else if ( treeSplitCriterion == "infogain"){
+ bestFeatureToSplitOnIndex = (int)(minEntropyIterator - featureSubsetEntropies.begin());
+ } else {
+ // TODO: we need an abort mechanism here
}
- else { bestFeatureToSplitOnIndex = (int)(minEntropyIterator - featureSubsetEntropies.begin()); }
+
+ // TODO: is the following line needed? kept is as future reference
+ // splitInformationGain = node.ownEntropy - node.splitFeatureEntropy
int bestFeatureSplitValue = featureSubsetSplitValues[bestFeatureToSplitOnIndex];
node->setSplitFeatureIndex(featureSubsetIndices[bestFeatureToSplitOnIndex]);
node->setSplitFeatureValue(bestFeatureSplitValue);
node->setSplitFeatureEntropy(bestFeatureSplitEntropy);
+ // TODO: kept the following line as future reference
+ // node.splitInformationGain = splitInformationGain
return 0;
}
int DecisionTree::printTree(RFTreeNode* treeNode, string caption){
try {
string tabs = "";
- for (int i = 0; i < treeNode->getGeneration(); i++) { tabs += " "; }
+ for (int i = 0; i < treeNode->getGeneration(); i++) { tabs += "|--"; }
// for (int i = 0; i < treeNode->getGeneration() - 1; i++) { tabs += "| "; }
// if (treeNode->getGeneration() != 0) { tabs += "|--"; }
if (treeNode != NULL && treeNode->checkIsLeaf() == false){
- m->mothurOut(tabs + caption + " [ gen: " + toString(treeNode->getGeneration()) + " ] ( " + toString(treeNode->getSplitFeatureValue()) + " < X" + toString(treeNode->getSplitFeatureIndex()) +" )\n");
+ m->mothurOut(tabs + caption + " [ gen: " + toString(treeNode->getGeneration()) + " , id: " + toString(treeNode->nodeId) + " ] ( " + toString(treeNode->getSplitFeatureValue()) + " < X" + toString(treeNode->getSplitFeatureIndex()) + " ) ( predicted: " + toString(treeNode->outputClass) + " , misclassified: " + toString(treeNode->testSampleMisclassificationCount) + " )\n");
- printTree(treeNode->getLeftChildNode(), "leftChild");
- printTree(treeNode->getRightChildNode(), "rightChild");
+ printTree(treeNode->getLeftChildNode(), "left ");
+ printTree(treeNode->getRightChildNode(), "right");
}else {
- m->mothurOut(tabs + caption + " [ gen: " + toString(treeNode->getGeneration()) + " ] ( classified to: " + toString(treeNode->getOutputClass()) + ", samples: " + toString(treeNode->getNumSamples()) + " )\n");
+ m->mothurOut(tabs + caption + " [ gen: " + toString(treeNode->getGeneration()) + " , id: " + toString(treeNode->nodeId) + " ] ( classified to: " + toString(treeNode->getOutputClass()) + ", samples: " + toString(treeNode->getNumSamples()) + " , misclassified: " + toString(treeNode->testSampleMisclassificationCount) + " )\n");
}
return 0;
}
if (treeNode == NULL) { return; }
deleteTreeNodesRecursively(treeNode->leftChildNode);
deleteTreeNodesRecursively(treeNode->rightChildNode);
- delete treeNode;
+ delete treeNode; treeNode = NULL;
}
catch(exception& e) {
m->errorOut(e, "DecisionTree", "deleteTreeNodesRecursively");
}
/***********************************************************************/
+void DecisionTree::pruneTree(double pruneAggressiveness = 0.9) {
+
+ // find out the number of misclassification by each of the nodes
+ for (int i = 0; i < bootstrappedTestSamples.size(); i++) {
+ if (m->control_pressed) { return; }
+
+ vector<int> testSample = bootstrappedTestSamples[i];
+ updateMisclassificationCountRecursively(rootNode, testSample);
+ }
+
+ // do the actual pruning
+ pruneRecursively(rootNode, pruneAggressiveness);
+}
+/***********************************************************************/
+
+void DecisionTree::pruneRecursively(RFTreeNode* treeNode, double pruneAggressiveness){
+
+ if (treeNode != NULL && treeNode->checkIsLeaf() == false) {
+ if (m->control_pressed) { return; }
+
+ pruneRecursively(treeNode->leftChildNode, pruneAggressiveness);
+ pruneRecursively(treeNode->rightChildNode, pruneAggressiveness);
+
+ int subTreeMisclassificationCount = treeNode->leftChildNode->getTestSampleMisclassificationCount() + treeNode->rightChildNode->getTestSampleMisclassificationCount();
+ int ownMisclassificationCount = treeNode->getTestSampleMisclassificationCount();
+
+ if (subTreeMisclassificationCount * pruneAggressiveness > ownMisclassificationCount) {
+ // TODO: need to check the effect of these two delete calls
+ delete treeNode->leftChildNode;
+ treeNode->leftChildNode = NULL;
+
+ delete treeNode->rightChildNode;
+ treeNode->rightChildNode = NULL;
+
+ treeNode->isLeaf = true;
+ }
+
+ }
+}
+/***********************************************************************/
+
+void DecisionTree::updateMisclassificationCountRecursively(RFTreeNode* treeNode, vector<int> testSample) {
+
+ int actualSampleOutputClass = testSample[numFeatures];
+ int nodePredictedOutputClass = treeNode->outputClass;
+
+ if (actualSampleOutputClass != nodePredictedOutputClass) {
+ treeNode->testSampleMisclassificationCount++;
+ map<int, int>::iterator it = nodeMisclassificationCounts.find(treeNode->nodeId);
+ if (it == nodeMisclassificationCounts.end()) { // NOT FOUND
+ nodeMisclassificationCounts[treeNode->nodeId] = 0;
+ }
+ nodeMisclassificationCounts[treeNode->nodeId]++;
+ }
+
+ if (treeNode->checkIsLeaf() == false) { // NOT A LEAF
+ int sampleSplitFeatureValue = testSample[treeNode->splitFeatureIndex];
+ if (sampleSplitFeatureValue < treeNode->splitFeatureValue) {
+ updateMisclassificationCountRecursively(treeNode->leftChildNode, testSample);
+ } else {
+ updateMisclassificationCountRecursively(treeNode->rightChildNode, testSample);
+ }
+ }
+}
+
+/***********************************************************************/
+
+void DecisionTree::updateOutputClassOfNode(RFTreeNode* treeNode) {
+ vector<int> counts(numOutputClasses, 0);
+ for (int i = 0; i < treeNode->bootstrappedOutputVector.size(); i++) {
+ int bootstrappedOutput = treeNode->bootstrappedOutputVector[i];
+ counts[bootstrappedOutput]++;
+ }
+
+ vector<int>::iterator majorityVotedOutputClassCountIterator = max_element(counts.begin(), counts.end());
+ int majorityVotedOutputClassCount = *majorityVotedOutputClassCountIterator;
+ vector<int>::iterator it = find(counts.begin(), counts.end(), majorityVotedOutputClassCount);
+ int majorityVotedOutputClass = (int)(it - counts.begin());
+ treeNode->setOutputClass(majorityVotedOutputClass);
+
+}
+/***********************************************************************/
+
+
// Copyright (c) 2012 Schloss Lab. All rights reserved.
//
-#ifndef rrf_fs_prototype_decisiontree_hpp
-#define rrf_fs_prototype_decisiontree_hpp
+#ifndef RF_DECISIONTREE_HPP
+#define RF_DECISIONTREE_HPP
#include "macros.h"
#include "rftreenode.hpp"
/***********************************************************************/
struct VariableRankDescendingSorter {
- bool operator() (vector<int> first, vector<int> second){ return first[1] > second[1]; }
+ bool operator() (const pair<int, int>& firstPair, const pair<int, int>& secondPair){
+ return firstPair.second > secondPair.second;
+ }
};
struct VariableRankDescendingSorterDouble {
- bool operator() (vector<double> first, vector<double> second){ return first[1] > second[1]; }
+ bool operator() (const pair<int, double>& firstPair, const pair<int, double>& secondPair){
+ return firstPair.second > secondPair.second;
+ }
};
/***********************************************************************/
public:
- DecisionTree(vector< vector<int> > baseDataSet,
+ DecisionTree(vector< vector<int> >& baseDataSet,
vector<int> globalDiscardedFeatureIndices,
OptimumFeatureSubsetSelector optimumFeatureSubsetSelector,
- string treeSplitCriterion);
+ string treeSplitCriterion,
+ float featureStandardDeviationThreshold);
+
virtual ~DecisionTree(){ deleteTreeNodesRecursively(rootNode); }
- int calcTreeVariableImportanceAndError();
+ int calcTreeVariableImportanceAndError(int& numCorrect, double& treeErrorRate);
int evaluateSample(vector<int> testSample);
int calcTreeErrorRate(int& numCorrect, double& treeErrorRate);
- vector< vector<int> > randomlyShuffleAttribute(vector< vector<int> > samples, int featureIndex);
+
+ void randomlyShuffleAttribute(const vector< vector<int> >& samples,
+ const int featureIndex,
+ const int prevFeatureIndex,
+ vector< vector<int> >& shuffledSample);
+
void purgeDataSetsFromTree() { purgeTreeNodesDataRecursively(rootNode); }
int purgeTreeNodesDataRecursively(RFTreeNode* treeNode);
+ void pruneTree(double pruneAggressiveness);
+ void pruneRecursively(RFTreeNode* treeNode, double pruneAggressiveness);
+ void updateMisclassificationCountRecursively(RFTreeNode* treeNode, vector<int> testSample);
+ void updateOutputClassOfNode(RFTreeNode* treeNode);
+
private:
vector<int> variableImportanceList;
map<int, int> outOfBagEstimates;
+
+ float featureStandardDeviationThreshold;
};
#endif
}
CountTable ct;
if (countfile != "") {
- ct.readTable(countfile, false);
+ ct.readTable(countfile, true);
if (countfile == outCountFile){
//prepare filenames and open files
map<string, string> mvariables;
--- /dev/null
+//
+// designmap.cpp
+// Mothur
+//
+// Created by SarahsWork on 6/17/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "designmap.h"
+
+/************************************************************/
+DesignMap::DesignMap(string file) {
+ try {
+ m = MothurOut::getInstance();
+ defaultClass = "not found";
+ read(file);
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "DesignMap");
+ exit(1);
+ }
+}
+/************************************************************/
+int DesignMap::read(string file) {
+ try {
+ ifstream in;
+ m->openInputFile(file, in);
+
+ string headers = m->getline(in); m->gobble(in);
+ vector<string> columnHeaders = m->splitWhiteSpace(headers);
+
+ namesOfCategories.clear();
+ indexCategoryMap.clear();
+ indexNameMap.clear();
+ designMap.clear();
+ map<int, string> originalGroupIndexes;
+ for (int i = 1; i < columnHeaders.size(); i++) { namesOfCategories.push_back(columnHeaders[i]); originalGroupIndexes[i-1] = columnHeaders[i]; }
+ if (columnHeaders.size() > 1) { defaultClass = columnHeaders[1]; }
+
+ //sort groups to keep consistent with how we store the groups in groupmap
+ sort(namesOfCategories.begin(), namesOfCategories.end());
+ for (int i = 0; i < namesOfCategories.size(); i++) { indexCategoryMap[namesOfCategories[i]] = i; }
+ int numCategories = namesOfCategories.size();
+
+ bool error = false;
+ string group;
+ totalCategories.resize(numCategories);
+ int count = 0;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ in >> group; m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + "\n"); }
+
+ //if group info, then read it
+ vector<string> categoryValues; categoryValues.resize(numCategories, "not found");
+ for (int i = 0; i < numCategories; i++) {
+ int thisIndex = indexCategoryMap[originalGroupIndexes[i]]; //find index of this category because we sort the values.
+ string temp = "not found";
+ in >> temp; categoryValues[thisIndex] = temp; m->gobble(in);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: value = " + temp + "\n"); }
+
+ //do we have this value for this category already
+ map<string, int>::iterator it = totalCategories[thisIndex].find(temp);
+ if (it == totalCategories[thisIndex].end()) { totalCategories[thisIndex][temp] = 1; }
+ else { totalCategories[thisIndex][temp]++; }
+ }
+
+
+ map<string, int>::iterator it = indexNameMap.find(group);
+ if (it == indexNameMap.end()) {
+ indexNameMap[group] = count;
+ designMap.push_back(categoryValues);
+ count++;
+ }else {
+ error = true;
+ m->mothurOut("[ERROR]: Your design file contains more than 1 group named " + group + ", group names must be unique. Please correct."); m->mothurOutEndLine();
+ }
+ }
+ in.close();
+
+ //sanity check
+ for (int i = 0; i < totalCategories.size(); i++) {
+ map<string, int>::iterator it = totalCategories[i].find(namesOfCategories[i]);
+ if (it != totalCategories[i].end()) { //we may have an old style design file since category name matches a value
+ m->mothurOut("\n[WARNING]: Your design file has a category and value for that category named " + namesOfCategories[i] + ". Perhaps you are using an old style design file without headers? If so, please correct."); m->mothurOutEndLine();
+ }
+ }
+
+ if (error) { m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "read");
+ exit(1);
+ }
+}
+/************************************************************/
+////groupName, returns first categories value.
+string DesignMap::get(string groupName) {
+ try {
+ string value = "not found";
+
+ map<string, int>::iterator it2 = indexNameMap.find(groupName);
+ if (it2 == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: group " + groupName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+ }else {
+ return designMap[it2->second][0];
+ }
+
+ return value;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "get");
+ exit(1);
+ }
+}
+/************************************************************/
+////categoryName, returns category values.
+vector<string> DesignMap::getValues(string catName) {
+ try {
+ vector<string> values;
+
+ map<string, int>::iterator it2 = indexCategoryMap.find(catName);
+ if (it2 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: category " + catName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+ }else {
+ for (map<string, int>::iterator it = totalCategories[it2->second].begin(); it != totalCategories[it2->second].end(); it++) {
+ values.push_back(it->first);
+ }
+ }
+
+ return values;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getValues");
+ exit(1);
+ }
+}
+
+/************************************************************/
+////groupName, category returns value. example F000132, sex -> male
+string DesignMap::get(string groupName, string categoryName) {
+ try {
+ string value = "not found";
+ map<string, int>::iterator it = indexCategoryMap.find(categoryName);
+ if (it == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: category " + categoryName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+ }else {
+ map<string, int>::iterator it2 = indexNameMap.find(groupName);
+ if (it2 == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: group " + groupName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+ }else {
+ return designMap[it2->second][it->second];
+ }
+ }
+ return value;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "get");
+ exit(1);
+ }
+}
+/************************************************************/
+//add group, assumes order is correct
+int DesignMap::push_back(string group, vector<string> values) {
+ try {
+ map<string, int>::iterator it = indexNameMap.find(group);
+ if (it == indexNameMap.end()) {
+ if (values.size() != getNumCategories()) { m->mothurOut("[ERROR]: Your design file has a " + toString(getNumCategories()) + " categories and " + group + " has " + toString(values.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+
+ for (int i = 0; i < values.size(); i++) {
+ //do we have this value for this category already
+ map<string, int>::iterator it = totalCategories[i].find(values[i]);
+ if (it == totalCategories[i].end()) { totalCategories[i][values[i]] = 1; }
+ else { totalCategories[i][values[i]]++; }
+ }
+ int count = indexNameMap.size();
+ indexNameMap[group] = count;
+ designMap.push_back(values);
+ }else {
+ m->mothurOut("[ERROR]: Your design file contains more than 1 group named " + group + ", group names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "push_back");
+ exit(1);
+ }
+}
+/************************************************************/
+//set values for group, does not need to set all values. assumes group is in table already
+int DesignMap::set(string group, map<string, string> values) {
+ try {
+ map<string, int>::iterator it = indexNameMap.find(group);
+ if (it != indexNameMap.end()) {
+ for (map<string, string>::iterator it2 = values.begin(); it2 != values.end(); it2++) {
+
+ map<string, int>::iterator it3 = indexCategoryMap.find(it2->first); //do we have this category
+ if (it3 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: Your design file does not contain a category called " + it2->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+ }else {
+ string oldCategory = designMap[it->second][it3->second];
+ //adjust totals for old category
+ int oldCount = totalCategories[it3->second][oldCategory];
+ if (oldCount == 1) { totalCategories[it3->second].erase(oldCategory); }
+ else { totalCategories[it3->second][oldCategory]--; }
+
+ designMap[it->second][it3->second] = it2->second; //reset value
+
+ //adjust totals for new category
+ map<string, int>::iterator it4 = totalCategories[it3->second].find(it2->second);
+ if (it4 == totalCategories[it3->second].end()) { totalCategories[it3->second][it2->second] = 1; }
+ else { totalCategories[it3->second][it2->second]++; }
+ }
+ }
+ }else {
+ m->mothurOut("[ERROR]: Your design file does not contain a group named " + group + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "set");
+ exit(1);
+ }
+}
+/************************************************************/
+//get number of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+// map<treatment - > early, late>, <sex -> male> would return 1. Only one group is male and from early or late.
+int DesignMap::getNumUnique(map<string, vector<string> > selected) {
+ try {
+ int num = 0;
+
+ //get indexes of categories
+ vector<int> indexes;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+ map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+ if (it2 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0;
+ }else { indexes.push_back(it2->second); }
+ }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ bool hasAll = true; //innocent til proven guilty
+ int count = 0;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+ //check category to see if this group meets the requirements
+ if (!m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAll = false; it = selected.end(); }
+ count++;
+ }
+ if (hasAll) { num++; }
+ }
+
+ return num;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getNumUnique");
+ exit(1);
+ }
+}
+/************************************************************/
+//get number of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+// map<treatment - > early, late>, <sex -> male> would return 3. All three group have are either male or from early or late.
+int DesignMap::getNumShared(map<string, vector<string> > selected) {
+ try {
+ int num = 0;
+
+ //get indexes of categories
+ vector<int> indexes;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+ map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+ if (it2 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0;
+ }else { indexes.push_back(it2->second); }
+ }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ bool hasAny = false; //guilty til proven innocent
+ int count = 0;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+ //check category to see if this group meets the requirements
+ if (m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAny = true; it = selected.end(); }
+ count++;
+ }
+ if (hasAny) { num++; }
+ }
+
+
+ return num;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getNumShared");
+ exit(1);
+ }
+}
+
+/************************************************************/
+//get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+// map<treatment - > early, late>, <sex -> male> would return F000132. F000132 is the only group which is male and from early or late.
+vector<string> DesignMap::getNamesUnique(map<string, vector<string> > selected) {
+ try {
+ vector<string> names;
+
+ //get indexes of categories
+ vector<int> indexes;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+ map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+ if (it2 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }else { indexes.push_back(it2->second); }
+ }
+
+ //map int to name
+ map<int, string> reverse;
+ for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+ reverse[it->second] = it->first;
+ }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ bool hasAll = true; //innocent til proven guilty
+ int count = 0;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+ //check category to see if this group meets the requirements
+ if (!m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAll = false; it = selected.end(); }
+ count++;
+ }
+ if (hasAll) {
+ map<int, string>::iterator it = reverse.find(i);
+ if (it == reverse.end()) {
+ m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }else { names.push_back(it->second); }
+ }
+ }
+
+
+ return names;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getNamesUnique");
+ exit(1);
+ }
+}
+/************************************************************/
+//get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+// map<treatment - > early, late>, <sex -> male> would return F000132. F000132 is the only group which is male and from early or late.
+vector<string> DesignMap::getNamesShared(map<string, vector<string> > selected) {
+ try {
+ vector<string> names;
+
+ //get indexes of categories
+ vector<int> indexes;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+ map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+ if (it2 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }else { indexes.push_back(it2->second); }
+ }
+
+ //map int to name
+ map<int, string> reverse;
+ for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+ reverse[it->second] = it->first;
+ }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ bool hasAny = false;
+ int count = 0;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+ //check category to see if this group meets the requirements
+ if (m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAny = true; it = selected.end(); }
+ count++;
+ }
+ if (hasAny) {
+ map<int, string>::iterator it = reverse.find(i);
+ if (it == reverse.end()) {
+ m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }else { names.push_back(it->second); }
+ }
+ }
+
+
+ return names;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getNamesShared");
+ exit(1);
+ }
+}
+
+/************************************************************/
+//get names of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+// map<treatment - > early, late>, <sex -> male> would return F000132, F000142, F000138. All three group have are either male or from early or late.
+
+vector<string> DesignMap::getNames(string category, string value) {
+ try {
+ vector<string> names;
+
+ map<string, int>::iterator it = indexCategoryMap.find(category);
+ if (it == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: category " + category + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+ }else {
+ int column = it->second;
+
+ //map int to name
+ map<int, string> reverse;
+ for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
+ reverse[it2->second] = it2->first;
+ }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ if (designMap[i][column] == value) {
+ map<int, string>::iterator it2 = reverse.find(i);
+ if (it2 == reverse.end()) {
+ m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }else { names.push_back(it2->second); }
+ }
+ }
+ }
+ return names;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getNames");
+ exit(1);
+ }
+}
+
+/************************************************************/
+int DesignMap::print(ofstream& out) {
+ try {
+
+ out << "group\t";
+ for (int i = 0; i < namesOfCategories.size(); i++) { out << namesOfCategories[i] << '\t'; }
+ out << endl;
+
+ map<int, string> reverse; //use this to preserve order
+ for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ map<int, string>::iterator itR = reverse.find(i);
+
+ if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap
+ out << itR->second << '\t';
+
+ for (int j = 0; j < namesOfCategories.size(); j++) {
+ out << designMap[i][j] << '\t';
+ }
+ out << endl;
+ }
+ }
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "print");
+ exit(1);
+ }
+}
+/************************************************************/
+//print specific categories
+int DesignMap::print(ofstream& out, vector<string> cats) {
+ try {
+
+ out << "group\t";
+ for (int i = 0; i < namesOfCategories.size(); i++) { if (m->inUsersGroups(namesOfCategories[i], cats)) { out << namesOfCategories[i] << '\t'; } }
+ out << endl;
+
+ map<int, string> reverse; //use this to preserve order
+ for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ map<int, string>::iterator itR = reverse.find(i);
+
+ if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap
+ out << itR->second << '\t';
+
+ for (int j = 0; j < namesOfCategories.size(); j++) {
+ if (m->inUsersGroups(namesOfCategories[i], cats)) {
+ out << designMap[i][j] << '\t';
+ }
+ }
+ out << endl;
+ }
+ }
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "print");
+ exit(1);
+ }
+}
+
+/************************************************************/
+
--- /dev/null
+//
+// designmap.h
+// Mothur
+//
+// Created by SarahsWork on 6/17/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef __Mothur__designmap__
+#define __Mothur__designmap__
+
+#include "mothurout.h"
+
+/* This class is a representation of the design file.
+
+ group treatment sex age
+ F000142 Early female young
+ F000132 Late male old
+ F000138 Mid male old
+
+
+ */
+
+class DesignMap {
+public:
+ DesignMap() { m = MothurOut::getInstance(); defaultClass = "not found"; }
+ DesignMap(string);
+ ~DesignMap() {}
+
+ int read(string);
+ string get(string, string); //groupName, category returns value. example F000132, sex -> male
+ string get(string); //groupName, returns first categories value. example F000132, -> late
+ vector<string> getValues(string); //categoryName, returns values. example treatment, -> early,late,mid
+ int set(string, map<string, string>); //groupName, map<category, value>
+ int push_back(string, vector<string>); //groupName, vector<value> - assumes you put values in order of getNamesOfCategories
+ vector<string> getNamesOfCategories() {
+ sort(namesOfCategories.begin(), namesOfCategories.end());
+ return namesOfCategories;
+ }
+ int getNumCategories() { return namesOfCategories.size(); }
+ int getNum() { return designMap.size(); }
+ int getNumUnique(map<string, vector<string> >); //get number of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+ // map<treatment - > early, late>, <sex -> male> would return 1. Only one group is male and from early or late.
+ int getNumShared(map<string, vector<string> >); //get number of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+ // map<treatment - > early, late>, <sex -> male> would return 3. All three group have are either male or from early or late.
+
+ vector<string> getNames();
+ vector<string> getNames(string, string); //get names group from category and value.
+ vector<string> getNamesUnique(map<string, vector<string> >); //get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+ // map<treatment - > early, late>, <sex -> male> would return F000132. F000132 is the only group which is male and from early or late.
+ vector<string> getNamesShared(map<string, vector<string> >); //get names of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+ // map<treatment - > early, late>, <sex -> male> would return F000132, F000142, F000138. All three group have are either male or from early or late.
+
+ int print(ofstream&);
+ int print(ofstream&, vector<string>); //print certain categories
+
+ string getDefaultClass() { return defaultClass; }
+
+private:
+ string defaultClass;
+ vector<string> namesOfCategories;
+ MothurOut* m;
+ vector< vector<string> > designMap;
+ vector< map<string, int> > totalCategories; //for each category, total groups assigned to it. vector[0] early -> 1, vector[1] male -> 2
+ map<string, int> indexNameMap;
+ map<string, int> indexCategoryMap;
+};
+
+
+#endif /* defined(__Mothur__designmap__) */
/***********************************************************************/
Forest::Forest(const std::vector < std::vector<int> > dataSet,
- const int numDecisionTrees,
- const string treeSplitCriterion = "informationGain")
-: dataSet(dataSet),
-numDecisionTrees(numDecisionTrees),
-numSamples((int)dataSet.size()),
-numFeatures((int)(dataSet[0].size() - 1)),
-globalVariableImportanceList(numFeatures, 0),
-treeSplitCriterion(treeSplitCriterion) {
+ const int numDecisionTrees,
+ const string treeSplitCriterion = "gainratio",
+ const bool doPruning = false,
+ const float pruneAggressiveness = 0.9,
+ const bool discardHighErrorTrees = true,
+ const float highErrorTreeDiscardThreshold = 0.4,
+ const string optimumFeatureSubsetSelectionCriteria = "log2",
+ const float featureStandardDeviationThreshold = 0.0)
+ : dataSet(dataSet),
+ numDecisionTrees(numDecisionTrees),
+ numSamples((int)dataSet.size()),
+ numFeatures((int)(dataSet[0].size() - 1)),
+ globalVariableImportanceList(numFeatures, 0),
+ treeSplitCriterion(treeSplitCriterion),
+ doPruning(doPruning),
+ pruneAggressiveness(pruneAggressiveness),
+ discardHighErrorTrees(discardHighErrorTrees),
+ highErrorTreeDiscardThreshold(highErrorTreeDiscardThreshold),
+ optimumFeatureSubsetSelectionCriteria(optimumFeatureSubsetSelectionCriteria),
+ featureStandardDeviationThreshold(featureStandardDeviationThreshold)
+ {
+
m = MothurOut::getInstance();
globalDiscardedFeatureIndices = getGlobalDiscardedFeatureIndices();
// TODO: double check if the implemenatation of 'globalOutOfBagEstimates' is correct
for (int i = 0; i < featureVectors.size(); i++) {
if (m->control_pressed) { return globalDiscardedFeatureIndices; }
double standardDeviation = m->getStandardDeviation(featureVectors[i]);
- if (standardDeviation <= 0){ globalDiscardedFeatureIndices.push_back(i); }
+ if (standardDeviation <= featureStandardDeviationThreshold){ globalDiscardedFeatureIndices.push_back(i); }
}
if (m->debug) {
public:
// intialization with vectors
Forest(const std::vector < std::vector<int> > dataSet,
- const int numDecisionTrees,
- const string);
+ const int numDecisionTrees,
+ const string treeSplitCriterion,
+ const bool doPruning,
+ const float pruneAggressiveness,
+ const bool discardHighErrorTrees,
+ const float highErrorTreeDiscardThreshold,
+ const string optimumFeatureSubsetSelectionCriteria,
+ const float featureStandardDeviationThreshold);
virtual ~Forest(){ }
virtual int populateDecisionTrees() = 0;
virtual int calcForrestErrorRate() = 0;
vector<int> globalDiscardedFeatureIndices;
vector<double> globalVariableImportanceList;
string treeSplitCriterion;
+
+ bool doPruning;
+ float pruneAggressiveness;
+ bool discardHighErrorTrees;
+ float highErrorTreeDiscardThreshold;
+ string optimumFeatureSubsetSelectionCriteria;
+ float featureStandardDeviationThreshold;
+
// This is a map of each feature to outcome count of each classes
// e.g. 1 => [2 7] means feature 1 has 2 outcome of 0 and 7 outcome of 1
map<int, vector<int> > globalOutOfBagEstimates;
if ((sharedfile != "") || (listfile != "")) {
label = validParameter.validFile(parameters, "label", false);
- if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
+ if (label == "not found") { label = ""; m->mothurOut("[WARNING]: You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); }
}
if (countfile == "") {
string getOutputPattern(string);
string getHelpString();
- string getCitation() { return "http://www.mothur.org/wiki/Get.metacommunity"; }
- string getDescription() { return "brief description"; }
+ string getCitation() { return "Holmes I, Harris K, Quince C (2012) Dirichlet Multinomial Mixtures: Generative Models for Microbial Metagenomics. PLoS ONE 7(2): e30126. doi:10.1371/journal.pone.0030126 http://www.mothur.org/wiki/Get.metacommunity"; }
+ string getDescription() { return "Assigns samples to bins using a Dirichlet multinomial mixture model"; }
int execute();
void help() { m->mothurOut(getHelpString()); }
helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups. You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
- helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
+ helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=true. \n";
helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
if (!dups) {//adjust name if needed
map<string, string>::iterator it = uniqueMap.find(name);
- if (it != uniqueMap.end()) { name = it->second; }
+ if (it != uniqueMap.end()) { currSeq.setName(it->second); }
}
+ name = currSeq.getName();
+
if (name != "") {
//if this name is in the accnos file
if (names.count(name) != 0) {
if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
- in >> firstCol;
+ in >> firstCol; m->gobble(in);
in >> secondCol;
string hold = "";
selectedCount += parsedNames.size();
if (m->debug) { sanity["name"].insert(firstCol); }
}else {
+
selectedCount += validSecond.size();
//if the name in the first column is in the set then print it and any other names in second column also in set
//make first name in set you come to first column and then add the remaining names to second column
}else {
+
//you want part of this row
if (validSecond.size() != 0) {
exit(1);
}
}
-
+/************************************************************/
+int GroupMap::renameSeq(string oldName, string newName) {
+ try {
+
+ map<string, string>::iterator itName;
+
+ itName = groupmap.find(oldName);
+
+ if (itName == groupmap.end()) {
+ m->mothurOut("[ERROR]: cannot find " + toString(oldName) + " in group file");
+ m->control_pressed = true;
+ return 0;
+ }else {
+ string group = itName->second;
+ groupmap.erase(itName);
+ groupmap[newName] = group;
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GroupMap", "renameSeq");
+ exit(1);
+ }
+}
+/************************************************************/
+int GroupMap::print(ofstream& out) {
+ try {
+
+ for (map<string, string>::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) {
+ out << itName->first << '\t' << itName->second << endl;
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GroupMap", "print");
+ exit(1);
+ }
+}
+/************************************************************/
+int GroupMap::print(ofstream& out, vector<string> userGroups) {
+ try {
+
+ for (map<string, string>::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) {
+ if (m->inUsersGroups(itName->second, userGroups)) {
+ out << itName->first << '\t' << itName->second << endl;
+ }
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GroupMap", "print");
+ exit(1);
+ }
+}
/************************************************************/
vector<string> GroupMap::getNamesSeqs(){
try {
vector<string> getNamesSeqs(vector<string>); //get names of seqs belonging to a group or set of groups
int getNumSeqs(string); //return the number of seqs in a given group
int getCopy(GroupMap*);
+ int renameSeq(string, string);
+ int print(ofstream&);
+ int print(ofstream&, vector<string>); //print certain groups
map<string, int> groupIndex; //groupname, vectorIndex in namesOfGroups. - used by collectdisplays and libshuff commands.
*/
#include "kruskalwalliscommand.h"
+#include "linearalgebra.h"
//**********************************************************************************************************************
-vector<string> KruskalWallisCommand::setParameters(){
+vector<string> KruskalWallisCommand::setParameters(){
try {
+ CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
+ CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
+ //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
- CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pgroups);
- CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
}
}
//**********************************************************************************************************************
-string KruskalWallisCommand::getHelpString(){
+string KruskalWallisCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The kruskalwallis command parameter options are \n";
- helpString += "Kruskal–Wallis one-way analysis of variance is a non-parametric method for testing whether samples originate from the same distribution.";
- return helpString;
+ helpString += "The kruskal.wallis command allows you to ....\n";
+ helpString += "The kruskal.wallis command parameters are: shared, design, class, label and classes.\n";
+ helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
+ helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old. class=treatment<Early|Late>-age<young|old>.\n";
+ helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
+ helpString += "The kruskal.wallis command should be in the following format: kruskal.wallis(shared=final.an.shared, design=final.design, class=treatment).\n";
+ return helpString;
}
catch(exception& e) {
m->errorOut(e, "KruskalWallisCommand", "getHelpString");
try {
string pattern = "";
- if (type == "summary") { pattern = "[filename],cooccurence.summary"; }
+ if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
}
}
//**********************************************************************************************************************
-KruskalWallisCommand::KruskalWallisCommand(){
+KruskalWallisCommand::KruskalWallisCommand(){
try {
- abort = true; calledHelp = true;
+ abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
- outputTypes["summary"] = tempOutNames;
-
+ outputTypes["kruskall-wallis"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
}
}
//**********************************************************************************************************************
-KruskalWallisCommand::KruskalWallisCommand(string option) {
+KruskalWallisCommand::KruskalWallisCommand(string option) {
try {
- abort = false; calledHelp = false;
-
+ abort = false; calledHelp = false;
+ allLines = 1;
+
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
else if(option == "citation") { citation(); abort = true; calledHelp = true;}
else {
+ //valid paramters for this command
vector<string> myArray = setParameters();
OptionParser parser(option);
map<string,string> parameters = parser.getParameters();
- map<string,string>::iterator it;
ValidParameters validParameter;
-
+ map<string,string>::iterator it;
//check to make sure all parameters are valid for command
- for (it = parameters.begin(); it != parameters.end(); it++) {
+ for (it = parameters.begin(); it != parameters.end(); it++) {
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
+
+ vector<string> tempOutNames;
+ outputTypes["kruskall-wallis"] = tempOutNames;
- //get shared file
- sharedfile = validParameter.validFile(parameters, "shared", true);
- if (sharedfile == "not open") { sharedfile = ""; abort = true; }
- else if (sharedfile == "not found") {
- //if there is a current shared file, use it
- sharedfile = m->getSharedFile();
- if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
- else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
- }else { m->setSharedFile(sharedfile); }
-
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
-
- groups = validParameter.validFile(parameters, "groups", false);
- if (groups == "not found") { groups = ""; }
- else {
- m->splitAtDash(groups, Groups);
- }
- m->setGroups(Groups);
-
- //if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
else {
- string path;
- it = parameters.find("shared");
+
+ string path;
+ it = parameters.find("design");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["desing"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("shared");
//user has given a template file
- if(it != parameters.end()){
+ if(it != parameters.end()){
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["shared"] = inputDir + it->second; }
}
+ }
+
+ //get shared file, it is required
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else if (sharedfile == "not found") {
+ //if there is a current shared file, use it
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setSharedFile(sharedfile); }
+
+ //get shared file, it is required
+ designfile = validParameter.validFile(parameters, "design", true);
+ if (designfile == "not open") { designfile = ""; abort = true; }
+ else if (designfile == "not found") {
+ //if there is a current shared file, use it
+ designfile = m->getDesignFile();
+ if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setDesignFile(designfile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
}
-
- vector<string> tempOutNames;
- outputTypes["summary"] = tempOutNames;
-
-
+
+ string label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; }
+ else {
+ if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+
+ mclass = validParameter.validFile(parameters, "class", false);
+ if (mclass == "not found") { mclass = ""; }
+
+ classes = validParameter.validFile(parameters, "classes", false);
+ if (classes == "not found") { classes = ""; }
+
}
-
+
}
catch(exception& e) {
m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
}
}
//**********************************************************************************************************************
+
int KruskalWallisCommand::execute(){
try {
+
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- InputData* input = new InputData(sharedfile, "sharedfile");
- vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
- string lastLabel = lookup[0]->getLabel();
-
-
- //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
- set<string> processedLabels;
- set<string> userLabels = labels;
-
- ofstream out;
- map<string,string> variables;
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
- string outputFileName = getOutputFileName("summary",variables);
- m->openOutputFile(outputFileName, out);
- outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
- out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
- out << "H\tpvalue\n";
-
- //math goes here
+ DesignMap designMap(designfile);
+ //if user set classes set groups=those classes
+ if (classes != "") {
+ map<string, vector<string> > thisClasses = m->parseClasses(classes);
+ vector<string> groups = designMap.getNamesUnique(thisClasses);
+ if (groups.size() != 0) { m->setGroups(groups); }
+ else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
+ }
- int N = m->getNumGroups();
- double H;
- double tmp = 0.0;
- vector<groupRank> vec;
- vector<string> groups = m->getGroups();
- string group;
- int count;
- double sum;
-
- //merge all groups into a vector
+ //if user did not select class use first column
+ if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
- //rank function here
- assignRank(vec);
- //populate counts and ranSums vectors
- for (int i=0;i<N;i++) {
- count = 0;
- sum = 0;
- group = groups[i];
- for(int j;j<vec.size();j++) {
- if (vec[j].group == group) {
- count++;
- sum = sum + vec[j].rank;
- }
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup, designMap);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
}
- counts[i] = count;
- rankSums[i] = sum;
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup, designMap);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+
+ if (m->control_pressed) { return 0; }
+
+ //get next line to process
+ lookup = input.getSharedRAbundVectors();
}
- //test statistic
- for (int i=0;i<N;i++) { tmp = tmp + (pow(rankSums[i],2) / counts[i]); }
-
- H = (12 / (N*(N+1))) * tmp - (3*(N+1));
-
- //ss = tmp - pow(accumulate(rankSums.begin(), rankSums.end(), 0), 2);
+ if (m->control_pressed) { return 0; }
- //H = ss / ( (N * (N + 1))/12 );
-
- //correction for ties?
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
- //p-value calculation
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+ process(lookup, designMap);
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
- return 0;
- }
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
catch(exception& e) {
m->errorOut(e, "KruskalWallisCommand", "execute");
exit(1);
}
}
//**********************************************************************************************************************
-void KruskalWallisCommand::assignRank(vector<groupRank> &vec) {
- try {
- double rank = 1;
- double numRanks, avgRank, j;
- vector<groupRank>::iterator it, oldit;
-
- sort (vec.begin(), vec.end(), comparevalue);
-
- it = vec.begin();
-
- while ( it != vec.end() ) {
- j = rank;
- oldit = it;
- if (!equalvalue(*it, *(it+1))) {
- (*it).rank = rank;
- rank = rank+1;
- it++; }
- else {
- while(equalrank(*it, *(it+1))) {
- j = j + (j+1);
- rank++;
- it++;
- }
- numRanks = double (distance(oldit, it));
- avgRank = j / numRanks;
- while(oldit != it) {
- (*oldit).rank = avgRank;
- oldit++;
- }
- }
+int KruskalWallisCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
+ try {
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ string outputFileName = getOutputFileName("kruskall-wallis",variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
+ out << "OTULabel\tKW\tPvalue\n";
+
+ int numBins = lookup[0]->getNumBins();
+ //sanity check to make sure each treatment has a group in the shared file
+ set<string> treatments;
+ for (int j = 0; j < lookup.size(); j++) {
+ string group = lookup[j]->getGroup();
+ string treatment = designMap.get(group, mclass); //get value for this group in this category
+ treatments.insert(treatment);
}
+ if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
-
+ LinearAlgebra linear;
+ for (int i = 0; i < numBins; i++) {
+ if (m->control_pressed) { break; }
+
+ vector<spearmanRank> values;
+ for (int j = 0; j < lookup.size(); j++) {
+ string group = lookup[j]->getGroup();
+ string treatment = designMap.get(group, mclass); //get value for this group in this category
+ spearmanRank temp(treatment, lookup[j]->getAbundance(i));
+ values.push_back(temp);
+ }
+
+ double pValue = 0.0;
+ double H = linear.calcKruskalWallis(values, pValue);
+
+ //output H and signifigance
+ out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
+ }
+ out.close();
+
+ return 0;
}
- catch(exception& e) {
- m->errorOut(e, "KruskalWallisCommand", "getRank");
+ catch(exception& e) {
+ m->errorOut(e, "KruskalWallisCommand", "process");
exit(1);
}
-
-}
-//**********************************************************************************************************************
-void KruskalWallisCommand::assignValue(vector<groupRank> &vec) {
-
}
//**********************************************************************************************************************
-//**********************************************************************************************************************
-//**********************************************************************************************************************
+
#include "command.hpp"
#include "inputdata.h"
-#include "sharedrabundvector.h"
-
+#include "designmap.h"
class KruskalWallisCommand : public Command {
~KruskalWallisCommand(){}
vector<string> setParameters();
- string getCommandName() { return "kruskalwallis"; }
+ string getCommandName() { return "kruskal.wallis"; }
string getCommandCategory() { return "Hypothesis Testing"; }
string getOutputPattern(string);
string getHelpString();
- string getCitation() { return "http://www.mothur.org/wiki/kruskalwallis"; }
+ string getCitation() { return "http://www.mothur.org/wiki/Kruskal.wallis"; }
string getDescription() { return "Non-parametric method for testing whether samples originate from the same distribution."; }
struct groupRank {
private:
- string outputDir, sharedfile, groups;
- bool abort;
+ bool abort, allLines;
+ string outputDir, sharedfile, designfile, mclass, classes;
+ vector<string> outputNames;
set<string> labels;
- vector<string> outputNames, Groups;
- vector<int> counts;
- vector<double> rankSums;
- vector<double> rankMeans;
-
-
-
- static bool comparevalue(const groupRank &a, const groupRank &b) { return a.value < b.value; }
- static bool equalvalue(const groupRank &a, const groupRank &b) { return a.value == b.value; }
- static bool comparerank(const groupRank &a, const groupRank &b) { return a.rank < b.rank; }
- static bool equalrank(const groupRank &a, const groupRank &b) { return a.rank == b.rank; }
- static bool equalgroup(const groupRank &a, const groupRank &b) { return a.group == b.group; }
+ int process(vector<SharedRAbundVector*>&, DesignMap&);
};
#endif /* KRUSKALWALLISCOMMAND_H */
--- /dev/null
+//
+// lefsecommand.cpp
+// Mothur
+//
+// Created by SarahsWork on 6/12/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "lefsecommand.h"
+#include "linearalgebra.h"
+
+//**********************************************************************************************************************
+vector<string> LefseCommand::setParameters(){
+ try {
+ CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
+ CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
+ CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
+ CommandParameter palpha("anova_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
+ CommandParameter pwalpha("wilcoxon_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha);
+ //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string LefseCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The lefse command allows you to ....\n";
+ helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, wilcoxon_alpha, anova_alpha.\n";
+ helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
+ helpString += "The subclass parameter is used to indicate the .....If none is provided second category is used, or if only one category subclass is ignored. \n";
+ helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old. class=treatment<Early|Late>-age<young|old>.\n";
+ helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
+ helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string LefseCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "summary") { pattern = "[filename],[distance],lefse_summary"; }
+ else if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; }
+ else if (type == "wilcoxon") { pattern = "[filename],[distance],wilcoxon"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+LefseCommand::LefseCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+ outputTypes["kruskall-wallis"] = tempOutNames;
+ outputTypes["wilcoxon"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "LefseCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+LefseCommand::LefseCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+ allLines = 1;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+ outputTypes["kruskall-wallis"] = tempOutNames;
+ outputTypes["wilcoxon"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+
+ string path;
+ it = parameters.find("design");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["desing"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+ }
+
+ //get shared file, it is required
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else if (sharedfile == "not found") {
+ //if there is a current shared file, use it
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setSharedFile(sharedfile); }
+
+ //get shared file, it is required
+ designfile = validParameter.validFile(parameters, "design", true);
+ if (designfile == "not open") { designfile = ""; abort = true; }
+ else if (designfile == "not found") {
+ //if there is a current shared file, use it
+ designfile = m->getDesignFile();
+ if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setDesignFile(designfile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
+ }
+
+ string label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; }
+ else {
+ if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+
+ mclass = validParameter.validFile(parameters, "class", false);
+ if (mclass == "not found") { mclass = ""; }
+
+ subclass = validParameter.validFile(parameters, "subclass", false);
+ if (subclass == "not found") { subclass = ""; }
+
+ classes = validParameter.validFile(parameters, "classes", false);
+ if (classes == "not found") { classes = ""; }
+
+ string temp = validParameter.validFile(parameters, "anova_alpha", false);
+ if (temp == "not found") { temp = "0.05"; }
+ m->mothurConvert(temp, anovaAlpha);
+
+ temp = validParameter.validFile(parameters, "wilcoxon_alpha", false);
+ if (temp == "not found") { temp = "0.05"; }
+ m->mothurConvert(temp, wilcoxonAlpha);
+
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "LefseCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int LefseCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ DesignMap designMap(designfile);
+ //if user set classes set groups=those classes
+ if (classes != "") {
+ map<string, vector<string> > thisClasses = m->parseClasses(classes);
+ vector<string> groups = designMap.getNamesUnique(thisClasses);
+ if (groups.size() != 0) { m->setGroups(groups); }
+ else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
+ }
+
+ //if user did not select class use first column
+ if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
+
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup, designMap);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup, designMap);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+
+ if (m->control_pressed) { return 0; }
+
+ //get next line to process
+ lookup = input.getSharedRAbundVectors();
+ }
+
+ if (m->control_pressed) { return 0; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+ process(lookup, designMap);
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
+
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int LefseCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
+ try {
+ //run kruskal wallis on each otu
+ vector<int> significantOtuLabels = runKruskalWallis(lookup, designMap);
+
+ //check for subclass
+ if (subclass != "") { significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels); }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "process");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+vector<int> LefseCommand::runKruskalWallis(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
+ try {
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ string outputFileName = getOutputFileName("kruskall-wallis",variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
+ out << "OTULabel\tKW\tPvalue\n";
+
+ vector<int> significantOtuLabels;
+ int numBins = lookup[0]->getNumBins();
+ //sanity check to make sure each treatment has a group in the shared file
+ set<string> treatments;
+ for (int j = 0; j < lookup.size(); j++) {
+ string group = lookup[j]->getGroup();
+ string treatment = designMap.get(group, mclass); //get value for this group in this category
+ treatments.insert(treatment);
+ }
+ if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
+
+ LinearAlgebra linear;
+ for (int i = 0; i < numBins; i++) {
+ if (m->control_pressed) { break; }
+
+ vector<spearmanRank> values;
+ for (int j = 0; j < lookup.size(); j++) {
+ string group = lookup[j]->getGroup();
+ string treatment = designMap.get(group, mclass); //get value for this group in this category
+ spearmanRank temp(treatment, lookup[j]->getAbundance(i));
+ values.push_back(temp);
+ }
+
+ double pValue = 0.0;
+ double H = linear.calcKruskalWallis(values, pValue);
+
+ //output H and signifigance
+ out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
+
+ if (pValue < anovaAlpha) { significantOtuLabels.push_back(i); }
+ }
+ out.close();
+
+ return significantOtuLabels;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "runKruskalWallis");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+vector<int> LefseCommand::runWilcoxon(vector<SharedRAbundVector*>& lookup, DesignMap& designMap, vector<int> bins) {
+ try {
+ vector<int> significantOtuLabels;
+ //if it exists and meets the following requirements run Wilcoxon
+ /*
+ 1. Subclass members all belong to same main class
+ 2. Number of groups in each subclass is the same
+ 3. anything else??
+
+ */
+ vector<string> subclasses;
+ map<string, string> subclass2Class;
+ map<string, int> subclassCounts;
+ map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below.
+ bool error = false;
+ for (int j = 0; j < lookup.size(); j++) {
+ string group = lookup[j]->getGroup();
+ string treatment = designMap.get(group, mclass); //get value for this group in this category
+ string thisSub = designMap.get(group, subclass);
+ map<string, string>::iterator it = subclass2Class.find(thisSub);
+ if (it == subclass2Class.end()) {
+ subclass2Class[thisSub] = treatment;
+ subclassCounts[thisSub] = 1;
+ vector<int> temp; temp.push_back(j);
+ subClass2GroupIndex[thisSub] = temp;
+ }
+ else {
+ subclassCounts[thisSub]++;
+ subClass2GroupIndex[thisSub].push_back(j);
+ if (it->second != treatment) {
+ error = true;
+ m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n");
+ }
+ }
+ }
+
+ if (error) { return significantOtuLabels; }
+ else { //check counts to make sure subclasses are the same size
+ set<int> counts;
+ for (map<string, int>::iterator it = subclassCounts.begin(); it != subclassCounts.end(); it++) { counts.insert(it->second); }
+ if (counts.size() > 1) { m->mothurOut("[ERROR]: subclasses must be the same size. Ignoring wilcoxon.\n");
+ return significantOtuLabels; }
+ }
+
+ int numBins = lookup[0]->getNumBins();
+ vector<compGroup> comp;
+ //find comparisons and fill comp
+ map<string, int>::iterator itB;
+ for(map<string, int>::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){
+ itB = it;itB++;
+ for(itB;itB!=subclassCounts.end();itB++){
+ compGroup temp(it->first,itB->first);
+ comp.push_back(temp);
+ }
+ }
+
+ int numComp = comp.size();
+ if (numComp < 2) { m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n");
+ return significantOtuLabels; }
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ string outputFileName = getOutputFileName("wilcoxon",variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName);
+ out << "OTULabel\tComparision\tWilcoxon\tPvalue\n";
+
+ LinearAlgebra linear;
+ for (int i = 0; i < numBins; i++) {
+ if (m->control_pressed) { break; }
+
+ if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis
+
+ bool sig = false;
+ //for each subclass comparision
+ for (int j = 0; j < numComp; j++) {
+ //fill x and y with this comparisons data
+ vector<double> x; vector<double> y;
+
+ //fill x and y
+ vector<int> xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass
+ for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); }
+
+ vector<int> yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass
+ for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); }
+
+ double pValue = 0.0;
+ double H = linear.calcWilcoxon(x, y, pValue);
+
+ //output H and signifigance
+ out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl;
+
+ //set sig - not sure how yet
+ }
+ if (sig) { significantOtuLabels.push_back(i); }
+ }
+ }
+ out.close();
+
+ return significantOtuLabels;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "runWilcoxon");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+
--- /dev/null
+//
+// lefsecommand.h
+// Mothur
+//
+// Created by SarahsWork on 6/12/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef __Mothur__lefsecommand__
+#define __Mothur__lefsecommand__
+
+#include "command.hpp"
+
+/*
+ Columns = groups, rows are OTUs, class = design
+
+ From http://huttenhower.sph.harvard.edu/galaxy/root?tool_id=lefse_upload
+ Input data consist of a collection of m samples (columns) each made up of n numerical features (rows, typically normalized per-sample, red representing high values and green low). These samples are labeled with a class (taking two or more possible values) that represents the main biological hypothesis under investigation; they may also have one or more subclass labels reflecting within-class groupings.
+
+ Step 1: the Kruskall-Wallis test analyzes all features, testing whether the values in different classes are differentially distributed. Features violating the null hypothesis are further analyzed in Step 2.
+ Step 2: the pairwise Wilcoxon test checks whether all pairwise comparisons between subclasses within different classes significantly agree with the class level trend.
+ Step 3: the resulting subset of vectors is used to build a Linear Discriminant Analysis model from which the relative difference among classes is used to rank the features. The final output thus consists of a list of features that are discriminative with respect to the classes, consistent with the subclass grouping within classes, and ranked according to the effect size with which they differentiate classes.
+*/
+
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "designmap.h"
+
+/**************************************************************************************************/
+
+class LefseCommand : public Command {
+public:
+ LefseCommand(string);
+ LefseCommand();
+ ~LefseCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "lefse"; }
+ string getCommandCategory() { return "OTU-Based Approaches"; }
+
+ string getOutputPattern(string);
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Lefse"; }
+ string getDescription() { return "brief description"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort, allLines;
+ string outputDir, sharedfile, designfile, mclass, subclass, classes;
+ vector<string> outputNames;
+ set<string> labels;
+ float anovaAlpha, wilcoxonAlpha;
+
+ int process(vector<SharedRAbundVector*>&, DesignMap&);
+ vector<int> runKruskalWallis(vector<SharedRAbundVector*>&, DesignMap&);
+ vector<int> runWilcoxon(vector<SharedRAbundVector*>&, DesignMap&, vector<int>);
+
+};
+
+/**************************************************************************************************/
+
+
+
+
+#endif /* defined(__Mothur__lefsecommand__) */
exit(1);
}
}
-
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pValue){
+ try {
+ double H;
+ set<string> treatments;
+
+ //rank values
+ sort(values.begin(), values.end(), compareSpearman);
+ vector<spearmanRank*> ties;
+ int rankTotal = 0;
+ vector<int> TIES;
+ for (int j = 0; j < values.size(); j++) {
+ treatments.insert(values[j].name);
+ rankTotal += (j+1);
+ ties.push_back(&(values[j]));
+
+ if (j != values.size()-1) { // you are not the last so you can look ahead
+ if (values[j].score != values[j+1].score) { // you are done with ties, rank them and continue
+ if (ties.size() > 1) { TIES.push_back(ties.size()); }
+ for (int k = 0; k < ties.size(); k++) {
+ double thisrank = rankTotal / (double) ties.size();
+ (*ties[k]).score = thisrank;
+ }
+ ties.clear();
+ rankTotal = 0;
+ }
+ }else { // you are the last one
+ if (ties.size() > 1) { TIES.push_back(ties.size()); }
+ for (int k = 0; k < ties.size(); k++) {
+ double thisrank = rankTotal / (double) ties.size();
+ (*ties[k]).score = thisrank;
+ }
+ }
+ }
+
+
+ // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1)
+ map<string, double> sums;
+ map<string, double> counts;
+ for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; }
+
+ for (int j = 0; j < values.size(); j++) {
+ sums[values[j].name] += values[j].score;
+ counts[values[j].name]+= 1.0;
+ }
+
+ double middleTerm = 0.0;
+ for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) {
+ middleTerm += ((sums[*it]*sums[*it])/counts[*it]);
+ }
+
+ double firstTerm = 12 / (double) (values.size()*(values.size()+1));
+ double lastTerm = 3 * (values.size()+1);
+
+ H = firstTerm * middleTerm - lastTerm;
+
+ //adjust for ties
+ if (TIES.size() != 0) {
+ double sum = 0.0;
+ for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); }
+ double result = 1.0 - (sum / (double) ((values.size()*values.size()*values.size())-values.size()));
+ H /= result;
+ }
+
+ //Numerical Recipes pg221
+ pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0));
+
+ return H;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "calcKruskalWallis");
+ exit(1);
+ }
+}
/*********************************************************************************************************************************/
//assumes both matrices are square and the same size
double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
exit(1);
}
}
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcWilcoxon(vector<double>& x, vector<double>& y, double& sig){
+ try {
+ if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
+
+ double W = 0.0;
+ sig = 0.0;
+
+ vector<double> signPairs;
+ vector<spearmanRank> absV;
+ for (int i = 0; i < x.size(); i++) {
+ if (m->control_pressed) { return W; }
+ double temp = y[i]-x[i];
+ double signV = sign(temp);
+ if (signV != 0) { // exclude zeros
+ spearmanRank member(toString(i), abs(temp));
+ absV.push_back(member);
+ signPairs.push_back(signV);
+ }
+ }
+
+ //rank absV
+ //sort xscores
+ sort(absV.begin(), absV.end(), compareSpearman);
+
+ //convert scores to ranks of x
+ vector<spearmanRank*> ties;
+ int rankTotal = 0;
+ for (int j = 0; j < absV.size(); j++) {
+ if (m->control_pressed) { return W; }
+ rankTotal += (j+1);
+ ties.push_back(&(absV[j]));
+
+ if (j != absV.size()-1) { // you are not the last so you can look ahead
+ if (absV[j].score != absV[j+1].score) { // you are done with ties, rank them and continue
+ for (int k = 0; k < ties.size(); k++) {
+ float thisrank = rankTotal / (float) ties.size();
+ (*ties[k]).score = thisrank;
+ }
+ ties.clear();
+ rankTotal = 0;
+ }
+ }else { // you are the last one
+ for (int k = 0; k < ties.size(); k++) {
+ float thisrank = rankTotal / (float) ties.size();
+ (*ties[k]).score = thisrank;
+ }
+ }
+ }
+ //sum ranks times sign
+ for (int i = 0; i < absV.size(); i++) {
+ if (m->control_pressed) { return W; }
+ W += (absV[i].score*signPairs[i]);
+ }
+ W = abs(W);
+
+ //find zScore
+ cout << "still need to find sig!!" << endl;
+
+ return W;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "calcWilcoxon");
+ exit(1);
+ }
+}
/*********************************************************************************************************************************/
double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double& sig){
try {
double calcPearson(vector<vector<double> >&, vector<vector<double> >&);
double calcSpearman(vector<vector<double> >&, vector<vector<double> >&);
double calcKendall(vector<vector<double> >&, vector<vector<double> >&);
+ double calcKruskalWallis(vector<spearmanRank>&, double&);
+ double calcWilcoxon(vector<double>&, vector<double>&, double&);
double calcPearson(vector<double>&, vector<double>&, double&);
double calcSpearman(vector<double>&, vector<double>&, double&);
vector<double> solveEquations(vector<vector<double> >, vector<double>);
vector<float> solveEquations(vector<vector<float> >, vector<float>);
vector<vector<double> > getInverse(vector<vector<double> >);
-
+
private:
MothurOut* m;
double betacf(const double, const double, const double);
double betai(const double, const double, const double);
double gammln(const double);
- double gammp(const double, const double);
double gammq(const double, const double);
double gser(double&, const double, const double, double&);
double gcf(double&, const double, const double, double&);
double erfcc(double);
+ double gammp(const double, const double);
double ran0(int&); //for testing
double ran1(int&); //for testing
// Copyright (c) 2012 Schloss Lab. All rights reserved.
//
-#ifndef rrf_fs_prototype_macros_h
-#define rrf_fs_prototype_macros_h
+#ifndef RF_MACROS_H
+#define RF_MACROS_H
#include "mothurout.h"
}
m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ".\n"); }
//check to make sure both are able to be opened
ifstream in2;
}
//roligo = reverseOligo(roligo);
+ if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
+
group = "";
// get rest of line in case there is a primer name
}
oligosPair newPrimer(foligo, roligo);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
//check for repeat barcodes
string tempPair = foligo+roligo;
CYGWIN_BUILD ?= no
USECOMPRESSION ?= no
MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"2/12/2013\""
-VERSION = "\"1.29.2\""
+RELEASE_DATE = "\"5/29/2013\""
+VERSION = "\"1.31.1\""
FORTAN_COMPILER = gfortran
FORTRAN_FLAGS =
--- /dev/null
+//
+// makelefse.cpp
+// Mothur
+//
+// Created by SarahsWork on 6/3/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "makelefsecommand.h"
+#include "designmap.h"
+
+//**********************************************************************************************************************
+vector<string> MakeLefseCommand::setParameters(){
+ try {
+ CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(pshared);
+ CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(prelabund);
+ CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false,false); parameters.push_back(pconstaxonomy);
+ CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false, true); parameters.push_back(pdesign);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+ CommandParameter pscale("scale", "Multiple", "totalgroup-totalotu-averagegroup-averageotu", "totalgroup", "", "", "","",false,false); parameters.push_back(pscale);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string MakeLefseCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The make.lefse command allows you to create a lefse formatted input file from mothur's output files.\n";
+ helpString += "The make.lefse command parameters are: shared, relabund, constaxonomy, design, scale, groups and label. The shared or relabund are required.\n";
+ helpString += "The shared parameter is used to input your shared file, http://www.wiki.mothur.org/wiki/Shared_file.\n";
+ helpString += "The relabund parameter is used to input your relabund file, http://www.wiki.mothur.org/wiki/Relabund_file.\n";
+ helpString += "The design parameter is used to input your design file, http://www.wiki.mothur.org/wiki/Design_File.\n";
+ helpString += "The constaxonomy parameter is used to input your taxonomy file. http://www.wiki.mothur.org/wiki/Constaxonomy_file. The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance. ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. \n";
+ helpString += "The scale parameter allows you to select what scale you would like to use to convert your shared file abundances to relative abundances. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n";
+ helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
+ helpString += "The make.lefse command should be in the following format: make.lefse(list=yourListFile, taxonomy=outputFromClassify.seqsCommand, name=yourNameFile)\n";
+ helpString += "make.lefse(shared=final.an.list, taxonomy=final.an.taxonomy, name=final.names)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string MakeLefseCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "lefse") { pattern = "[filename],[distance],lefse"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+MakeLefseCommand::MakeLefseCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["lefse"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+MakeLefseCommand::MakeLefseCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["lefse"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("design");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["design"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("constaxonomy");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("relabund");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["relabund"] = inputDir + it->second; }
+ }
+ }
+
+ //check for parameters
+ designfile = validParameter.validFile(parameters, "design", true);
+ if (designfile == "not open") { abort = true; }
+ else if (designfile == "not found") { designfile = ""; }
+ else { m->setDesignFile(designfile); }
+
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { abort = true; }
+ else if (sharedfile == "not found") { sharedfile = ""; }
+ else { m->setSharedFile(sharedfile); }
+
+ relabundfile = validParameter.validFile(parameters, "relabund", true);
+ if (relabundfile == "not open") { abort = true; }
+ else if (relabundfile == "not found") { relabundfile = ""; }
+ else { m->setRelAbundFile(relabundfile); }
+
+ constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
+ if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
+ else if (constaxonomyfile == "not found") { constaxonomyfile = ""; }
+
+ label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
+
+ string groups = validParameter.validFile(parameters, "groups", false);
+ if (groups == "not found") { groups = ""; }
+ else {
+ m->splitAtDash(groups, Groups);
+ m->setGroups(Groups);
+ }
+
+
+ if ((relabundfile == "") && (sharedfile == "")) {
+ //is there are current file available for either of these?
+ //give priority to shared, then relabund
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else {
+ relabundfile = m->getRelAbundFile();
+ if (relabundfile != "") { m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }
+ }
+
+ if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = ""; }
+
+ scale = validParameter.validFile(parameters, "scale", false); if (scale == "not found") { scale = "totalgroup"; }
+
+ if ((scale != "totalgroup") && (scale != "totalotu") && (scale != "averagegroup") && (scale != "averageotu")) {
+ m->mothurOut(scale + " is not a valid scaling option for the get.relabund command. Choices are totalgroup, totalotu, averagegroup, averageotu."); m->mothurOutEndLine(); abort = true;
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int MakeLefseCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ map<string, consTax2> consTax;
+ if (constaxonomyfile != "") { m->readConsTax(constaxonomyfile, consTax); }
+
+ if (m->control_pressed) { return 0; }
+
+ if (sharedfile != "") {
+ inputFile = sharedfile;
+ vector<SharedRAbundFloatVector*> lookup = getSharedRelabund();
+ runRelabund(consTax, lookup);
+ }else {
+ inputFile = relabundfile;
+ vector<SharedRAbundFloatVector*> lookup = getRelabund();
+ runRelabund(consTax, lookup);
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MakeLefseCommand::runRelabund(map<string, consTax2>& consTax, vector<SharedRAbundFloatVector*>& lookup){
+ try {
+ if (outputDir == "") { outputDir = m->hasPath(inputFile); }
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ string outputFile = getOutputFileName("lefse",variables);
+ outputNames.push_back(outputFile); outputTypes["lefse"].push_back(outputFile);
+
+ ofstream out;
+ m->openOutputFile(outputFile, out);
+
+ DesignMap* designMap = NULL;
+ if (designfile != "") {
+ designMap = new DesignMap(designfile);
+ vector<string> categories = designMap->getNamesOfCategories();
+
+ for (int j = 0; j < categories.size(); j++) {
+ out << categories[j] << "\t";
+ for (int i = 0; i < lookup.size(); i++) {
+ if (m->control_pressed) { out.close(); delete designMap; return 0; }
+ string value = designMap->get(lookup[i]->getGroup(), categories[j]);
+ if (value == "not found") {
+ m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n"); m->control_pressed = true;
+ }else { out << value << '\t'; }
+ }
+ out << endl;
+ }
+ }
+
+ out << "group\t";
+ for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
+ out << endl;
+
+ for (int i = 0; i < lookup[0]->getNumBins(); i++) { //process each otu
+ if (m->control_pressed) { break; }
+ string nameOfOtu = m->currentBinLabels[i];
+ if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy
+ map<string, consTax2>::iterator it = consTax.find(nameOfOtu);
+ if (it != consTax.end()) {
+ nameOfOtu = it->second.taxonomy;
+ //add sanity check abundances here??
+ string fixedName = "";
+ //remove confidences and change ; to |
+ m->removeConfidences(nameOfOtu);
+ for (int j = 0; j < nameOfOtu.length()-1; j++) {
+ if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentBinLabels[i] + '|'; }
+ else { fixedName += nameOfOtu[j]; }
+ }
+ nameOfOtu = fixedName;
+ }else {
+ m->mothurOut("[ERROR]: can't find " + nameOfOtu + " in constaxonomy file. Do the distances match, did you forget to use the label parameter?\n"); m->control_pressed = true;
+ }
+
+ }
+ //print name
+ out << nameOfOtu << '\t';
+
+ //print out relabunds for each otu
+ for (int j = 0; j < lookup.size(); j++) { out << lookup[j]->getAbundance(i) << '\t'; }
+ out << endl;
+ }
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<SharedRAbundFloatVector*> MakeLefseCommand::getSharedRelabund(){
+ try {
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> templookup = input.getSharedRAbundVectors();
+ string lastLabel = templookup[0]->getLabel();
+ vector<SharedRAbundFloatVector*> lookup;
+
+ if (label == "") { label = lastLabel; }
+ else {
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> labels; labels.insert(label);
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((templookup[0] != NULL) && (userLabels.size() != 0)) {
+ if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; } return lookup; }
+
+ if(labels.count(templookup[0]->getLabel()) == 1){
+ processedLabels.insert(templookup[0]->getLabel());
+ userLabels.erase(templookup[0]->getLabel());
+ break;
+ }
+
+ if ((m->anyLabelsToProcess(templookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = templookup[0]->getLabel();
+
+ for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; }
+ templookup = input.getSharedRAbundVectors(lastLabel);
+
+ processedLabels.insert(templookup[0]->getLabel());
+ userLabels.erase(templookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ templookup[0]->setLabel(saveLabel);
+ break;
+ }
+
+ lastLabel = templookup[0]->getLabel();
+
+ //get next line to process
+ //prevent memory leak
+ for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; }
+ templookup = input.getSharedRAbundVectors();
+ }
+
+
+ if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; } return lookup; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < templookup.size(); i++) { if (templookup[i] != NULL) { delete templookup[i]; } }
+ templookup = input.getSharedRAbundVectors(lastLabel);
+ }
+ }
+
+ for (int i = 0; i < templookup.size(); i++) {
+ SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+ temp->setLabel(templookup[i]->getLabel());
+ temp->setGroup(templookup[i]->getGroup());
+ lookup.push_back(temp);
+ }
+
+ //convert to relabund
+ for (int i = 0; i < templookup.size(); i++) {
+ for (int j = 0; j < templookup[i]->getNumBins(); j++) {
+
+ if (m->control_pressed) { for (int k = 0; k < templookup.size(); k++) { delete templookup[k]; } return lookup; }
+
+ int abund = templookup[i]->getAbundance(j);
+ float relabund = 0.0;
+
+ if (scale == "totalgroup") {
+ relabund = abund / (float) templookup[i]->getNumSeqs();
+ }else if (scale == "totalotu") {
+ //calc the total in this otu
+ int totalOtu = 0;
+ for (int l = 0; l < templookup.size(); l++) { totalOtu += templookup[l]->getAbundance(j); }
+ relabund = abund / (float) totalOtu;
+ }else if (scale == "averagegroup") {
+ relabund = abund / (float) (templookup[i]->getNumSeqs() / (float) templookup[i]->getNumBins());
+ }else if (scale == "averageotu") {
+ //calc the total in this otu
+ int totalOtu = 0;
+ for (int l = 0; l < templookup.size(); l++) { totalOtu += templookup[l]->getAbundance(j); }
+ float averageOtu = totalOtu / (float) templookup.size();
+
+ relabund = abund / (float) averageOtu;
+ }else{ m->mothurOut(scale + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ lookup[i]->push_back(relabund, lookup[i]->getGroup());
+ }
+ }
+
+ for (int k = 0; k < templookup.size(); k++) { delete templookup[k]; }
+
+ return lookup;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "getSharedRelabund");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<SharedRAbundFloatVector*> MakeLefseCommand::getRelabund(){
+ try {
+ InputData input(relabundfile, "relabund");
+ vector<SharedRAbundFloatVector*> lookupFloat = input.getSharedRAbundFloatVectors();
+ string lastLabel = lookupFloat[0]->getLabel();
+
+ if (label == "") { label = lastLabel; return lookupFloat; }
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> labels; labels.insert(label);
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
+
+ if (m->control_pressed) { return lookupFloat; }
+
+ if(labels.count(lookupFloat[0]->getLabel()) == 1){
+ processedLabels.insert(lookupFloat[0]->getLabel());
+ userLabels.erase(lookupFloat[0]->getLabel());
+ break;
+ }
+
+ if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookupFloat[0]->getLabel();
+
+ for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
+ lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
+
+ processedLabels.insert(lookupFloat[0]->getLabel());
+ userLabels.erase(lookupFloat[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookupFloat[0]->setLabel(saveLabel);
+ break;
+ }
+
+ lastLabel = lookupFloat[0]->getLabel();
+
+ //get next line to process
+ //prevent memory leak
+ for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
+ lookupFloat = input.getSharedRAbundFloatVectors();
+ }
+
+
+ if (m->control_pressed) { return lookupFloat; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }
+ lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
+ }
+
+ return lookupFloat;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "getRelabund");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
--- /dev/null
+//
+// makelefse.h
+// Mothur
+//
+// Created by SarahsWork on 6/3/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef __Mothur__makelefse__
+#define __Mothur__makelefse__
+
+#include "mothurout.h"
+#include "command.hpp"
+#include "inputdata.h"
+#include "sharedutilities.h"
+#include "phylosummary.h"
+
+/**************************************************************************************************/
+
+class MakeLefseCommand : public Command {
+public:
+ MakeLefseCommand(string);
+ MakeLefseCommand();
+ ~MakeLefseCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "make.lefse"; }
+ string getCommandCategory() { return "General"; }
+
+ string getOutputPattern(string);
+ string getHelpString();
+ string getCitation() { return "http://huttenhower.sph.harvard.edu/galaxy/root?tool_id=lefse_upload http://www.mothur.org/wiki/Make.lefse"; }
+ string getDescription() { return "creates LEfSe input file"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort, allLines, otulabel, hasGroupInfo;
+ string outputDir;
+ vector<string> outputNames, Groups;
+ string sharedfile, designfile, constaxonomyfile, relabundfile, scale, label, inputFile;
+
+ int runRelabund(map<string, consTax2>&, vector<SharedRAbundFloatVector*>&);
+
+ vector<SharedRAbundFloatVector*> getRelabund();
+ vector<SharedRAbundFloatVector*> getSharedRelabund();
+};
+
+/**************************************************************************************************/
+
+
+
+
+#endif /* defined(__Mothur__makelefse__) */
double gapOpening = 10;
int maxHomoP = 101;
- vector<vector<double> > penaltyMatrix(maxHomoP);
+ vector<vector<double> > penaltyMatrix; penaltyMatrix.resize(maxHomoP);
for(int i=0;i<maxHomoP;i++){
penaltyMatrix[i].resize(maxHomoP, 5);
penaltyMatrix[i][i] = 0;
while(!refFASTA.eof()){
if (m->control_pressed) { refFASTA.close(); return 0; }
+
Sequence seq(refFASTA); m->gobble(refFASTA);
+ if (m->debug) { m->mothurOut("[DEBUG]: seq = " + seq.getName() + ".\n"); }
+
string fullSequence = keySequence + barcodeSequence + seq.getAligned(); // * concatenate the keySequence, barcodeSequence, and
// referenceSequences
refFlowgrams[seq.getName()] = convertSeqToFlow(fullSequence, flowOrder); // * translate concatenated sequences into flowgram
}
refFASTA.close();
- vector<vector<double> > lookupTable(1000);
+ vector<vector<double> > lookupTable; lookupTable.resize(1000);
for(int i=0;i<1000;i++){
lookupTable[i].resize(11, 0);
}
-
+ if (m->debug) { m->mothurOut("[DEBUG]: here .\n"); }
//Loop through each sequence in the flow file and the error summary file.
ifstream flowFile;
m->openInputFile(flowFileName, flowFile);
int numFlows;
flowFile >> numFlows;
+ if (m->debug) { m->mothurOut("[DEBUG]: numflows = " + toString(numFlows) + ".\n"); }
+
ifstream errorFile;
m->openInputFile(errorFileName, errorFile);
m->getline(errorFile); //grab headers
string chimera;
float intensity;
- vector<double> std(11, 0);
+ vector<double> std; std.resize(11, 0);
while(errorFile && flowFile){
flowFile >> flowQuery >> dummy;
if(flowQuery != errorQuery){ cout << flowQuery << " != " << errorQuery << endl; }
- vector<double> refFlow = refFlowgrams[referenceName]; // * compare sequence to its closest reference
-
- vector<double> flowgram(numFlows);
-
- for(int i=0;i<numFlows;i++){
- flowFile >> intensity;
- flowgram[i] = intensity;// (int)round(100 * intensity);
- }
- m->gobble(flowFile);
-
- alignFlowGrams(flowgram, refFlow, gapOpening, penaltyMatrix, flowOrder);
-
- if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
-
- for(int i=0;i<flowgram.size();i++){
- int count = (int)round(100*flowgram[i]);
- if(count > 1000){count = 999;}
- if(abs(flowgram[i]-refFlow[i])<=0.50){
- lookupTable[count][int(refFlow[i])]++; // * build table
- std[int(refFlow[i])] += (100*refFlow[i]-count)*(100*refFlow[i]-count);
+ map<string, vector<double> >::iterator it = refFlowgrams.find(referenceName); // * compare sequence to its closest reference
+ if (it == refFlowgrams.end()) {
+ m->mothurOut("[WARNING]: missing reference flow " + referenceName + ", ignoring flow " + flowQuery + ".\n");
+ m->getline(flowFile); m->gobble(flowFile);
+ }else {
+ vector<double> refFlow = it->second;
+ vector<double> flowgram; flowgram.resize(numFlows);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: flowQuery = " + flowQuery + ".\t" + "refName " + referenceName+ ".\n"); }
+
+ for(int i=0;i<numFlows;i++){
+ flowFile >> intensity;
+ flowgram[i] = intensity;// (int)round(100 * intensity);
+ }
+ m->gobble(flowFile);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: before align.\n"); }
+
+ alignFlowGrams(flowgram, refFlow, gapOpening, penaltyMatrix, flowOrder);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: after align.\n"); }
+
+ if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
+
+ for(int i=0;i<flowgram.size();i++){
+ int count = (int)round(100*flowgram[i]);
+ if(count > 1000){count = 999;}
+ if(abs(flowgram[i]-refFlow[i])<=0.50){
+ lookupTable[count][int(refFlow[i])]++; // * build table
+ std[int(refFlow[i])] += (100*refFlow[i]-count)*(100*refFlow[i]-count);
+ }
}
}
}
errorFile.close(); flowFile.close();
+
//get probabilities
- vector<int> counts(11, 0);
+ vector<int> counts; counts.resize(11, 0);
int totalCount = 0;
for(int i=0;i<1000;i++){
for(int j=0;j<11;j++){
//calculate the probability of each homopolymer length
- vector<double> negLogHomoProb(11, 0.00); //bring back
+ vector<double> negLogHomoProb; negLogHomoProb.resize(11, 0.00); //bring back
for(int i=0;i<N;i++){
negLogHomoProb[i] = -log(counts[i] / (double)totalCount);
}
//cout << numQueryFlows << '\t' << numRefFlows << endl;
- vector<vector<double> > scoreMatrix(numQueryFlows+1);
- vector<vector<char> > directMatrix(numQueryFlows+1);
+ vector<vector<double> > scoreMatrix; scoreMatrix.resize(numQueryFlows+1);
+ vector<vector<char> > directMatrix; directMatrix.resize(numQueryFlows+1);
for(int i=0;i<=numQueryFlows;i++){
if (m->control_pressed) { return 0; }
directMatrix[0][i] = 'l';
}
+
for(int i=1;i<=numQueryFlows;i++){
for(int j=1;j<=numRefFlows;j++){
if (m->control_pressed) { return 0; }
int minColumnIndex = numRefFlows;
double minColumnScore = scoreMatrix[numQueryFlows][numRefFlows];
- for(int i=0;i<numQueryFlows;i++){
+ for(int i=0;i<numRefFlows;i++){
if (m->control_pressed) { return 0; }
if(scoreMatrix[numQueryFlows][i] < minColumnScore){
minColumnScore = scoreMatrix[numQueryFlows][i];
vector<double> newFlowgram;
vector<double> newRefFlowgram;
-
+
while(i > 0 && j > 0){
if (m->control_pressed) { return 0; }
if(directMatrix[i][j] == 'd'){
string getOutputPattern(string);
string getHelpString();
- string getCitation() { return "http://www.mothur.org/wiki/Make.lookup"; }
- string getDescription() { return "create custom lookup files for use with shhh.flows"; }
+ string getCitation() { return "Quince, C., A. Lanzén, T. P. Curtis, R. J. Davenport, N. Hall, I. M. Head, L. F. Read, and W. T. Sloan. 2009. Accurate determination of microbial diversity from 454 pyrosequencing data. Nat Methods 6:639-41. http://www.mothur.org/wiki/Make.lookup"; }
+ string getDescription() { return "Creates a lookup file for use with shhh.flows using user-supplied mock community data and flow grams"; }
int execute();
void help() { m->mothurOut(getHelpString()); }
if (namefile != "") {
nameMap = new NameAssignment(namefile);
nameMap->readMap();
- }else{ nameMap= new NameAssignment(); }
+ }else if (countfile != "") {
+ ct = new CountTable();
+ ct->readTable(countfile, false);
+ nameMap= new NameAssignment();
+ vector<string> tempNames = ct->getNamesOfSeqs();
+ for (int i = 0; i < tempNames.size(); i++) { nameMap->push_back(tempNames[i]); }
+ }else{ nameMap= new NameAssignment(); }
string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
string tag = "";
RAbundVector* rabund = NULL;
if(countfile != "") {
- //map<string, int> nameMapCounts = m->readNames(namefile);
- ct = new CountTable();
- ct->readTable(countfile, false);
rabund = new RAbundVector();
createRabund(ct, list, rabund);
}else {
PDistCell() : index(0), dist(0) {};
PDistCell(ull c, float d) : index(c), dist(d) {}
};
+/***********************************************************************/
+struct consTax{
+ string name;
+ string taxonomy;
+ int abundance;
+ consTax() : name(""), taxonomy("unknown"), abundance(0) {};
+ consTax(string n, string t, int a) : name(n), taxonomy(t), abundance(a) {}
+};
+/***********************************************************************/
+struct consTax2{
+ string taxonomy;
+ int abundance;
+ consTax2() : taxonomy("unknown"), abundance(0) {};
+ consTax2(string t, int a) : taxonomy(t), abundance(a) {}
+};
/************************************************************/
struct clusterNode {
int numSeq;
seqPriorityNode(int n, string s, string nm) : numIdentical(n), seq(s), name(nm) {}
~seqPriorityNode() {}
};
+/************************************************************/
+struct compGroup {
+ string group1;
+ string group2;
+ compGroup() {}
+ compGroup(string s, string nm) : group1(s), group2(nm) {}
+ string getCombo() { return group1+"-"+group2; }
+ ~compGroup() {}
+};
/***************************************************************/
struct spearmanRank {
string name;
//sorts lowest to highest
inline bool compareSequenceDistance(seqDist left, seqDist right){
return (left.dist < right.dist);
-}
+}
+//********************************************************************************************************************
+//returns sign of double
+inline double sign(double temp){
+ //find sign
+ if (temp > 0) { return 1.0; }
+ else if (temp < 0) { return -1.0; }
+ return 0;
+}
/***********************************************************************/
// snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
}
}
+//**********************************************************************************************************************
+
+map<string, vector<string> > MothurOut::parseClasses(string classes){
+ try {
+ map<string, vector<string> > parts;
+
+ //treatment<Early|Late>-age<young|old>
+ vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
+
+ for (int i = 0; i < pieces.size(); i++) {
+ string category = ""; string value = "";
+ bool foundOpen = false;
+ for (int j = 0; j < pieces[i].length(); j++) {
+ if (control_pressed) { return parts; }
+
+ if (pieces[i][j] == '<') { foundOpen = true; }
+ else if (pieces[i][j] == '>') { j += pieces[i].length(); }
+ else {
+ if (!foundOpen) { category += pieces[i][j]; }
+ else { value += pieces[i][j]; }
+ }
+ }
+ vector<string> values; splitAtChar(value, values, '|');
+ parts[category] = values;
+ }
+
+ return parts;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "parseClasses");
+ exit(1);
+ }
+}
/***********************************************************************/
string MothurOut::hasPath(string longName){
exit(1);
}
}
+//**********************************************************************************************************************
+vector<consTax> MothurOut::readConsTax(string inputfile){
+ try {
+
+ vector<consTax> taxes;
+
+ ifstream in;
+ openInputFile(inputfile, in);
+
+ //read headers
+ getline(in);
+
+ while (!in.eof()) {
+
+ if (control_pressed) { break; }
+
+ string otu = ""; string tax = "unknown";
+ int size = 0;
+
+ in >> otu >> size >> tax; gobble(in);
+ consTax temp(otu, tax, size);
+ taxes.push_back(temp);
+ }
+ in.close();
+
+ return taxes;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readConsTax");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MothurOut::readConsTax(string inputfile, map<string, consTax2>& taxes){
+ try {
+ ifstream in;
+ openInputFile(inputfile, in);
+
+ //read headers
+ getline(in);
+
+ while (!in.eof()) {
+
+ if (control_pressed) { break; }
+
+ string otu = ""; string tax = "unknown";
+ int size = 0;
+
+ in >> otu >> size >> tax; gobble(in);
+ consTax2 temp(tax, size);
+ taxes[otu] = temp;
+ }
+ in.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readConsTax");
+ exit(1);
+ }
+}
/**************************************************************************************************/
vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
try {
in.read(buffer, 4096);
vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
- for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]);
+ names.insert(pieces[i]);
+ }
}
in.close();
map<string, int> readNames(string);
map<string, int> readNames(string, unsigned long int&);
int readTax(string, map<string, string>&);
+ vector<consTax> readConsTax(string);
+ int readConsTax(string, map<string, consTax2>&);
int readNames(string, map<string, string>&, map<string, int>&);
int readNames(string, map<string, string>&);
int readNames(string, map<string, string>&, bool);
string makeList(vector<string>&);
bool isSubset(vector<string>, vector<string>); //bigSet, subset
int checkName(string&);
+ map<string, vector<string> > parseClasses(string);
//math operation
int factorial(int num);
//save names of columns you are reading
while (!iss.eof()) {
iss >> columnLabel; m->gobble(iss);
+ if (m->debug) { m->mothurOut("[DEBUG]: metadata column Label = " + columnLabel + "\n"); }
metadataLabels.push_back(columnLabel);
}
int count = metadataLabels.size();
string group = "";
in >> group; m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: metadata group = " + group + "\n"); }
SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
tempLookup->setGroup(group);
for (int i = 0; i < count; i++) {
float temp = 0.0;
- in >> temp;
+ in >> temp;
+ if (m->debug) { m->mothurOut("[DEBUG]: metadata value = " + toString(temp) + "\n"); }
tempLookup->push_back(temp, group);
}
exit(1);
}
}
+/**************************************************************************************************/
+void PhyloSummary::print(ofstream& out, bool relabund){
+ try {
+
+ if (ignore) { assignRank(0); }
+
+ int totalChildrenInTree = 0;
+ map<string, int>::iterator itGroup;
+
+ map<string,int>::iterator it;
+ for(it=tree[0].children.begin();it!=tree[0].children.end();it++){
+ if (tree[it->second].total != 0) {
+ totalChildrenInTree++;
+ tree[0].total += tree[it->second].total;
+
+ if (groupmap != NULL) {
+ vector<string> mGroups = groupmap->getNamesOfGroups();
+ for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; }
+ }else if ( ct != NULL) {
+ vector<string> mGroups = ct->getNamesOfGroups();
+ if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } }
+ }
+ }
+ }
+
+ //print root
+ out << tree[0].name << "\t" << "1.0000" << "\t"; //root relative abundance is 1, everyone classifies to root
+
+ /*
+ if (groupmap != NULL) {
+ for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; }
+ }else if ( ct != NULL) {
+ if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; } }
+ }*/
+
+ if (groupmap != NULL) {
+ vector<string> mGroups = groupmap->getNamesOfGroups();
+ for (int i = 0; i < mGroups.size(); i++) { out << "1.0000" << '\t'; }
+ }else if ( ct != NULL) {
+ vector<string> mGroups = ct->getNamesOfGroups();
+ if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { out << "1.0000" << '\t'; } }
+ }
+
+ out << endl;
+
+ //print rest
+ print(0, out, relabund);
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloSummary", "print");
+ exit(1);
+ }
+}
/**************************************************************************************************/
void PhyloSummary::print(int i, ofstream& out){
for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
if (tree[it->second].total != 0) {
-
+
int totalChildrenInTree = 0;
-
+
map<string,int>::iterator it2;
- for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){
+ for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){
if (tree[it2->second].total != 0) { totalChildrenInTree++; }
}
-
+
out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << totalChildrenInTree << "\t" << tree[it->second].total << "\t";
map<string, int>::iterator itGroup;
// out << itGroup->second << '\t';
//}
vector<string> mGroups = groupmap->getNamesOfGroups();
- for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; }
+ for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; }
}else if (ct != NULL) {
if (ct->hasGroupInfo()) {
vector<string> mGroups = ct->getNamesOfGroups();
- for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; }
+ for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; }
}
}
out << endl;
exit(1);
}
}
+
+/**************************************************************************************************/
+
+void PhyloSummary::print(int i, ofstream& out, bool relabund){
+ try {
+ map<string,int>::iterator it;
+ for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
+
+ if (tree[it->second].total != 0) {
+
+ int totalChildrenInTree = 0;
+
+ map<string,int>::iterator it2;
+ for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){
+ if (tree[it2->second].total != 0) { totalChildrenInTree++; }
+ }
+
+ string nodeName = "";
+ int thisNode = it->second;
+ while (tree[thisNode].rank != "0") { //while you are not at top
+ if (m->control_pressed) { break; }
+ nodeName = tree[thisNode].name + "|" + nodeName;
+ thisNode = tree[thisNode].parent;
+ }
+ if (nodeName != "") { nodeName = nodeName.substr(0, nodeName.length()-1); }
+
+ out << nodeName << "\t" << (tree[it->second].total / (float)tree[i].total) << "\t";
+
+ map<string, int>::iterator itGroup;
+ if (groupmap != NULL) {
+ vector<string> mGroups = groupmap->getNamesOfGroups();
+ for (int j = 0; j < mGroups.size(); j++) {
+ if (tree[i].groupCount[mGroups[j]] == 0) {
+ out << 0 << '\t';
+ }else { out << (tree[it->second].groupCount[mGroups[j]] / (float)tree[i].groupCount[mGroups[j]]) << '\t'; }
+ }
+ }else if (ct != NULL) {
+ if (ct->hasGroupInfo()) {
+ vector<string> mGroups = ct->getNamesOfGroups();
+ for (int j = 0; j < mGroups.size(); j++) {
+ if (tree[i].groupCount[mGroups[j]] == 0) {
+ out << 0 << '\t';
+ }else { out << (tree[it->second].groupCount[mGroups[j]] / (float)tree[i].groupCount[mGroups[j]]) << '\t'; }
+ }
+ }
+ }
+ out << endl;
+
+ }
+
+ print(it->second, out, relabund);
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloSummary", "print");
+ exit(1);
+ }
+}
/**************************************************************************************************/
void PhyloSummary::readTreeStruct(ifstream& in){
try {
int addSeqToTree(string, string);
int addSeqToTree(string, map<string, bool>);
void print(ofstream&);
+ void print(ofstream&, bool);
int getMaxLevel() { return maxLevel; }
private:
string getNextTaxon(string&);
vector<rawTaxNode> tree;
void print(int, ofstream&);
+ void print(int, ofstream&, bool);
void assignRank(int);
void readTreeStruct(ifstream&);
GroupMap* groupmap;
/***********************************************************************/
-RandomForest::RandomForest(const vector <vector<int> > dataSet,const int numDecisionTrees,
- const string treeSplitCriterion = "informationGain") : Forest(dataSet, numDecisionTrees, treeSplitCriterion) {
+RandomForest::RandomForest(const vector <vector<int> > dataSet,
+ const int numDecisionTrees,
+ const string treeSplitCriterion = "gainratio",
+ const bool doPruning = false,
+ const float pruneAggressiveness = 0.9,
+ const bool discardHighErrorTrees = true,
+ const float highErrorTreeDiscardThreshold = 0.4,
+ const string optimumFeatureSubsetSelectionCriteria = "log2",
+ const float featureStandardDeviationThreshold = 0.0)
+ : Forest(dataSet, numDecisionTrees, treeSplitCriterion, doPruning, pruneAggressiveness, discardHighErrorTrees, highErrorTreeDiscardThreshold, optimumFeatureSubsetSelectionCriteria, featureStandardDeviationThreshold) {
m = MothurOut::getInstance();
}
}
/***********************************************************************/
-// DONE
int RandomForest::calcForrestVariableImportance(string filename) {
try {
- // TODO: need to add try/catch operators to fix this
- // follow the link: http://en.wikipedia.org/wiki/Dynamic_cast
+ // follow the link: http://en.wikipedia.org/wiki/Dynamic_cast
//if you are going to dynamically cast, aren't you undoing the advantage of abstraction. Why abstract at all?
//could cause maintenance issues later if other types of Abstract decison trees are created that cannot be cast as a decision tree.
- for (int i = 0; i < decisionTrees.size(); i++) {
- if (m->control_pressed) { return 0; }
-
- DecisionTree* decisionTree = dynamic_cast<DecisionTree*>(decisionTrees[i]);
+ for (int i = 0; i < decisionTrees.size(); i++) {
+ if (m->control_pressed) { return 0; }
+
+ DecisionTree* decisionTree = dynamic_cast<DecisionTree*>(decisionTrees[i]);
+
+ for (int j = 0; j < numFeatures; j++) {
+ globalVariableImportanceList[j] += (double)decisionTree->variableImportanceList[j];
+ }
+ }
- for (int j = 0; j < numFeatures; j++) {
- globalVariableImportanceList[j] += (double)decisionTree->variableImportanceList[j];
+ for (int i = 0; i < numFeatures; i++) {
+ globalVariableImportanceList[i] /= (double)numDecisionTrees;
}
- }
-
- for (int i = 0; i < numFeatures; i++) {
- cout << "[" << i << ',' << globalVariableImportanceList[i] << "], ";
- globalVariableImportanceList[i] /= (double)numDecisionTrees;
- }
-
- vector< vector<double> > globalVariableRanks;
- for (int i = 0; i < globalVariableImportanceList.size(); i++) {
- if (globalVariableImportanceList[i] > 0) {
- vector<double> globalVariableRank(2, 0);
- globalVariableRank[0] = i; globalVariableRank[1] = globalVariableImportanceList[i];
- globalVariableRanks.push_back(globalVariableRank);
+
+ vector< pair<int, double> > globalVariableRanks;
+ for (int i = 0; i < globalVariableImportanceList.size(); i++) {
+ //cout << "[" << i << ',' << globalVariableImportanceList[i] << "], ";
+ if (globalVariableImportanceList[i] > 0) {
+ pair<int, double> globalVariableRank(0, 0.0);
+ globalVariableRank.first = i;
+ globalVariableRank.second = globalVariableImportanceList[i];
+ globalVariableRanks.push_back(globalVariableRank);
+ }
}
- }
-
- VariableRankDescendingSorterDouble variableRankDescendingSorter;
- sort(globalVariableRanks.begin(), globalVariableRanks.end(), variableRankDescendingSorter);
+
+// for (int i = 0; i < globalVariableRanks.size(); i++) {
+// cout << m->currentBinLabels[(int)globalVariableRanks[i][0]] << '\t' << globalVariableImportanceList[globalVariableRanks[i][0]] << endl;
+// }
+
+
+ VariableRankDescendingSorterDouble variableRankDescendingSorter;
+ sort(globalVariableRanks.begin(), globalVariableRanks.end(), variableRankDescendingSorter);
+
ofstream out;
m->openOutputFile(filename, out);
out <<"OTU\tRank\n";
for (int i = 0; i < globalVariableRanks.size(); i++) {
- out << m->currentBinLabels[(int)globalVariableRanks[i][0]] << '\t' << globalVariableImportanceList[globalVariableRanks[i][0]] << endl;
+ out << m->currentBinLabels[(int)globalVariableRanks[i].first] << '\t' << globalVariableImportanceList[globalVariableRanks[i].first] << endl;
}
out.close();
return 0;
}
}
/***********************************************************************/
-// DONE
int RandomForest::populateDecisionTrees() {
try {
+ vector<double> errorRateImprovements;
+
for (int i = 0; i < numDecisionTrees; i++) {
+
if (m->control_pressed) { return 0; }
- if (((i+1) % 10) == 0) { m->mothurOut("Creating " + toString(i+1) + " (th) Decision tree\n"); }
+ if (((i+1) % 100) == 0) { m->mothurOut("Creating " + toString(i+1) + " (th) Decision tree\n"); }
+
// TODO: need to first fix if we are going to use pointer based system or anything else
- DecisionTree* decisionTree = new DecisionTree(dataSet, globalDiscardedFeatureIndices, OptimumFeatureSubsetSelector("log2"), treeSplitCriterion);
- decisionTree->calcTreeVariableImportanceAndError();
- if (m->control_pressed) { return 0; }
- updateGlobalOutOfBagEstimates(decisionTree);
- if (m->control_pressed) { return 0; }
- decisionTree->purgeDataSetsFromTree();
- if (m->control_pressed) { return 0; }
- decisionTrees.push_back(decisionTree);
+ DecisionTree* decisionTree = new DecisionTree(dataSet, globalDiscardedFeatureIndices, OptimumFeatureSubsetSelector(optimumFeatureSubsetSelectionCriteria), treeSplitCriterion, featureStandardDeviationThreshold);
+
+ if (m->debug && doPruning) {
+ m->mothurOut("Before pruning\n");
+ decisionTree->printTree(decisionTree->rootNode, "ROOT");
+ }
+
+ int numCorrect;
+ double treeErrorRate;
+
+ decisionTree->calcTreeErrorRate(numCorrect, treeErrorRate);
+ double prePrunedErrorRate = treeErrorRate;
+
+ if (m->debug) {
+ m->mothurOut("treeErrorRate: " + toString(treeErrorRate) + " numCorrect: " + toString(numCorrect) + "\n");
+ }
+
+ if (doPruning) {
+ decisionTree->pruneTree(pruneAggressiveness);
+ if (m->debug) {
+ m->mothurOut("After pruning\n");
+ decisionTree->printTree(decisionTree->rootNode, "ROOT");
+ }
+ decisionTree->calcTreeErrorRate(numCorrect, treeErrorRate);
+ }
+ double postPrunedErrorRate = treeErrorRate;
+
+
+ decisionTree->calcTreeVariableImportanceAndError(numCorrect, treeErrorRate);
+ double errorRateImprovement = (prePrunedErrorRate - postPrunedErrorRate) / prePrunedErrorRate;
+
+ if (m->debug) {
+ m->mothurOut("treeErrorRate: " + toString(treeErrorRate) + " numCorrect: " + toString(numCorrect) + "\n");
+ if (doPruning) {
+ m->mothurOut("errorRateImprovement: " + toString(errorRateImprovement) + "\n");
+ }
+ }
+
+
+ if (discardHighErrorTrees) {
+ if (treeErrorRate < highErrorTreeDiscardThreshold) {
+ updateGlobalOutOfBagEstimates(decisionTree);
+ decisionTree->purgeDataSetsFromTree();
+ decisionTrees.push_back(decisionTree);
+ if (doPruning) {
+ errorRateImprovements.push_back(errorRateImprovement);
+ }
+ } else {
+ delete decisionTree;
+ }
+ } else {
+ updateGlobalOutOfBagEstimates(decisionTree);
+ decisionTree->purgeDataSetsFromTree();
+ decisionTrees.push_back(decisionTree);
+ if (doPruning) {
+ errorRateImprovements.push_back(errorRateImprovement);
+ }
+ }
+ }
+
+ double avgErrorRateImprovement = -1.0;
+ if (errorRateImprovements.size() > 0) {
+ avgErrorRateImprovement = accumulate(errorRateImprovements.begin(), errorRateImprovements.end(), 0.0);
+// cout << "Total " << avgErrorRateImprovement << " size " << errorRateImprovements.size() << endl;
+ avgErrorRateImprovement /= errorRateImprovements.size();
}
- if (m->debug) {
- // m->mothurOut("globalOutOfBagEstimates = " + toStringVectorMap(globalOutOfBagEstimates)+ "\n");
+ if (m->debug && doPruning) {
+ m->mothurOut("avgErrorRateImprovement:" + toString(avgErrorRateImprovement) + "\n");
}
+ // m->mothurOut("globalOutOfBagEstimates = " + toStringVectorMap(globalOutOfBagEstimates)+ "\n");
+
return 0;
}
// Copyright (c) 2012 Schloss Lab. All rights reserved.
//
-#ifndef rrf_fs_prototype_randomforest_hpp
-#define rrf_fs_prototype_randomforest_hpp
+#ifndef RF_RANDOMFOREST_HPP
+#define RF_RANDOMFOREST_HPP
#include "macros.h"
#include "forest.h"
public:
- // DONE
- RandomForest(const vector <vector<int> > dataSet,const int numDecisionTrees, const string);
+ RandomForest(const vector <vector<int> > dataSet,
+ const int numDecisionTrees,
+ const string treeSplitCriterion,
+ const bool doPruning,
+ const float pruneAggressiveness,
+ const bool discardHighErrorTrees,
+ const float highErrorTreeDiscardThreshold,
+ const string optimumFeatureSubsetSelectionCriteria,
+ const float featureStandardDeviationThreshold);
//NOTE:: if you are going to dynamically cast, aren't you undoing the advantage of abstraction. Why abstract at all?
//could cause maintenance issues later if other types of Abstract decison trees are created that cannot be cast as a decision tree.
-// virtual ~RandomForest() {
-// for (vector<AbstractDecisionTree*>::iterator it = decisionTrees.begin(); it != decisionTrees.end(); it++) {
-// // we know that this is decision tree, so we can do a dynamic_case<DecisionTree*> here
-// DecisionTree* decisionTree = dynamic_cast<DecisionTree*>(*it);
-// // calling the destructor by deleting
-// delete decisionTree;
-// }
-// }
+ virtual ~RandomForest() {
+ for (vector<AbstractDecisionTree*>::iterator it = decisionTrees.begin(); it != decisionTrees.end(); it++) {
+ // we know that this is decision tree, so we can do a dynamic_case<DecisionTree*> here
+ DecisionTree* decisionTree = dynamic_cast<DecisionTree*>(*it);
+ // calling the destructor by deleting
+ delete decisionTree;
+ }
+ }
int calcForrestErrorRate();
int calcForrestVariableImportance(string);
#include "regularizedrandomforest.h"
-RegularizedRandomForest::RegularizedRandomForest(const vector <vector<int> > dataSet,const int numDecisionTrees,
- const string treeSplitCriterion = "informationGain") : Forest(dataSet, numDecisionTrees, treeSplitCriterion) {
+RegularizedRandomForest::RegularizedRandomForest(const vector <vector<int> > dataSet,
+ const int numDecisionTrees,
+ const string treeSplitCriterion = "gainratio")
+ // TODO: update ctor according to basic RandomForest Class
+ : Forest(dataSet,
+ numDecisionTrees,
+ treeSplitCriterion,
+ false, 0.9, true, 0.4, "log2", 0.0) {
m = MothurOut::getInstance();
}
--- /dev/null
+//
+// renameseqscommand.cpp
+// Mothur
+//
+// Created by SarahsWork on 5/28/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "renameseqscommand.h"
+#include "sequence.hpp"
+#include "groupmap.h"
+
+//**********************************************************************************************************************
+vector<string> RenameSeqsCommand::setParameters(){
+ try {
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameSeqsCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string RenameSeqsCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The rename.seqs command reads a fastafile and groupfile with an optional namefile, and creates files with the sequence names concatenated with the group. For example if a line in the group file is 'seq1 group1', the new sequence name will be seq1_group1.\n";
+ helpString += "The rename.seqs command parameters are fasta, name and group. Fasta and group are required, unless a current file is available for both.\n";
+ helpString += "The rename.seqs command should be in the following format: \n";
+ helpString += "rename.seqs(fasta=yourFastaFile, group=yourGroupFile) \n";
+ helpString += "Example rename.seqs(fasta=abrecovery.unique.fasta, group=abrecovery.group).\n";
+ helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameSeqsCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string RenameSeqsCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "fasta") { pattern = "[filename],renamed,[extension]"; }
+ else if (type == "name") { pattern = "[filename],renamed,[extension]"; }
+ else if (type == "group") { pattern = "[filename],renamed,[extension]"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameSeqsCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+RenameSeqsCommand::RenameSeqsCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["name"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameSeqsCommand", "RenameSeqsCommand");
+ exit(1);
+ }
+}
+/**************************************************************************************/
+RenameSeqsCommand::RenameSeqsCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string, string>::iterator it;
+
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["name"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("fasta");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fasta"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("name");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["name"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
+
+ }
+
+
+ //check for required parameters
+ fastaFile = validParameter.validFile(parameters, "fasta", true);
+ if (fastaFile == "not open") { abort = true; }
+ else if (fastaFile == "not found") {
+ fastaFile = m->getFastaFile();
+ if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setFastaFile(fastaFile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = "";
+ outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
+ }
+
+ groupfile = validParameter.validFile(parameters, "group", true);
+ if (groupfile == "not open") { abort = true; }
+ else if (groupfile == "not found") {
+ groupfile = m->getGroupFile();
+ if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current groupfile and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setGroupFile(groupfile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
+ nameFile = validParameter.validFile(parameters, "name", true);
+ if (nameFile == "not open") { abort = true; }
+ else if (nameFile == "not found"){ nameFile =""; }
+ else { m->setNameFile(nameFile); }
+
+ if (nameFile == "") {
+ vector<string> files; files.push_back(fastaFile);
+ parser.getNameFile(files);
+ }
+
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameSeqsCommand", "RenameSeqsCommand");
+ exit(1);
+ }
+}
+/**************************************************************************************/
+int RenameSeqsCommand::execute() {
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ GroupMap groupMap(groupfile);
+ groupMap.readMap();
+
+ //prepare filenames and open files
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(fastaFile); }
+ string outFastaFile = thisOutputDir + m->getRootName(m->getSimpleName(fastaFile));
+ map<string, string> variables;
+ variables["[filename]"] = outFastaFile;
+ variables["[extension]"] = m->getExtension(fastaFile);
+ outFastaFile = getOutputFileName("fasta", variables);
+ outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
+
+ ofstream outFasta;
+ m->openOutputFile(outFastaFile, outFasta);
+
+ ifstream in;
+ m->openInputFile(fastaFile, in);
+
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ Sequence seq(in); m->gobble(in);
+ string group = groupMap.getGroup(seq.getName());
+ if (group == "not found") { m->mothurOut("[ERROR]: " + seq.getName() + " is not in your group file, please correct.\n"); m->control_pressed = true; }
+ else {
+ string newName = seq.getName() + "_" + group;
+ seq.setName(newName);
+ seq.printSequence(outFasta);
+ }
+
+ }
+ in.close();
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ bool notDone = true;
+ if (nameFile != "") {
+ thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(nameFile); }
+ string outNameFile = thisOutputDir + m->getRootName(m->getSimpleName(nameFile));
+ variables["[filename]"] = outNameFile;
+ variables["[extension]"] = m->getExtension(nameFile);
+ outNameFile = getOutputFileName("group", variables);
+ outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile);
+
+ ofstream outName;
+ m->openOutputFile(outNameFile, outName);
+
+ map<string, vector<string> > nameMap;
+ m->readNames(nameFile, nameMap);
+
+ //process name file changing names
+ for (map<string, vector<string> >::iterator it = nameMap.begin(); it != nameMap.end(); it++) {
+ for (int i = 0; i < (it->second).size()-1; i++) {
+ if (m->control_pressed) { break; }
+ string group = groupMap.getGroup((it->second)[i]);
+ if (group == "not found") { m->mothurOut("[ERROR]: " + (it->second)[i] + " is not in your group file, please correct.\n"); m->control_pressed = true; }
+ else {
+ string newName = (it->second)[i] + "_" + group;
+ groupMap.renameSeq((it->second)[i], newName); //change in group file
+ (it->second)[i] = newName; //change in namefile
+ }
+ if (i == 0) { outName << (it->second)[i] << '\t' << (it->second)[i] << ','; }
+ else { outName << (it->second)[i] << ','; }
+ }
+
+ //print last one
+ if ((it->second).size() == 1) {
+ string group = groupMap.getGroup((it->second)[0]);
+ if (group == "not found") { m->mothurOut("[ERROR]: " + (it->second)[0] + " is not in your group file, please correct.\n"); m->control_pressed = true; }
+ else {
+ string newName = (it->second)[0] + "_" + group;
+ groupMap.renameSeq((it->second)[0], newName); //change in group file
+ (it->second)[0] = newName; //change in namefile
+
+ outName << (it->second)[0] << '\t' << (it->second)[0] << endl;
+ }
+ }
+ else {
+ string group = groupMap.getGroup((it->second)[(it->second).size()-1]);
+ if (group == "not found") { m->mothurOut("[ERROR]: " + (it->second)[(it->second).size()-1] + " is not in your group file, please correct.\n"); m->control_pressed = true; }
+ else {
+ string newName = (it->second)[(it->second).size()-1] + "_" + group;
+ groupMap.renameSeq((it->second)[(it->second).size()-1], newName); //change in group file
+ (it->second)[(it->second).size()-1] = newName; //change in namefile
+
+ outName << (it->second)[(it->second).size()-1] << endl;
+ }
+ }
+ }
+ notDone = false;
+ outName.close();
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ if (notDone) {
+ vector<string> seqs = groupMap.getNamesSeqs();
+ for (int i = 0; i < seqs.size(); i++) {
+ if (m->control_pressed) { break; }
+ string group = groupMap.getGroup(seqs[i]);
+ string newName = seqs[i] + "_" + group;
+ groupMap.renameSeq(seqs[i], newName);
+ }
+ }
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
+ string outGroupFile = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
+ variables["[filename]"] = outGroupFile;
+ variables["[extension]"] = m->getExtension(groupfile);
+ outGroupFile = getOutputFileName("group", variables);
+ outputNames.push_back(outGroupFile); outputTypes["group"].push_back(outGroupFile);
+
+ ofstream outGroup;
+ m->openOutputFile(outGroupFile, outGroup);
+ groupMap.print(outGroup);
+ outGroup.close();
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ //set fasta file as new current fastafile
+ string current = "";
+ itTypes = outputTypes.find("fasta");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+ }
+
+ itTypes = outputTypes.find("name");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
+ }
+
+ itTypes = outputTypes.find("group");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameSeqsCommand", "execute");
+ exit(1);
+ }
+}
+/**************************************************************************************/
+
--- /dev/null
+//
+// renameseqscommand.h
+// Mothur
+//
+// Created by SarahsWork on 5/28/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_renameseqscommand_h
+#define Mothur_renameseqscommand_h
+
+#include "command.hpp"
+
+class RenameSeqsCommand : public Command {
+
+public:
+ RenameSeqsCommand(string);
+ RenameSeqsCommand();
+ ~RenameSeqsCommand() {}
+
+ vector<string> setParameters();
+ string getCommandName() { return "rename.seqs"; }
+ string getCommandCategory() { return "Sequence Processing"; }
+
+ string getHelpString();
+ string getOutputPattern(string);
+ string getCitation() { return "http://www.mothur.org/wiki/Rename.seqs"; }
+ string getDescription() { return "rename sequences"; }
+
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+
+private:
+
+ string fastaFile, nameFile, groupfile, outputDir;
+ vector<string> outputNames;
+ bool abort;
+
+ map<string, string> nameMap;
+};
+
+
+
+#endif
/***********************************************************************/
RFTreeNode::RFTreeNode(vector< vector<int> > bootstrappedTrainingSamples,
- vector<int> globalDiscardedFeatureIndices,
- int numFeatures,
- int numSamples,
- int numOutputClasses,
- int generation)
+ vector<int> globalDiscardedFeatureIndices,
+ int numFeatures,
+ int numSamples,
+ int numOutputClasses,
+ int generation,
+ int nodeId,
+ float featureStandardDeviationThreshold)
-: bootstrappedTrainingSamples(bootstrappedTrainingSamples),
-globalDiscardedFeatureIndices(globalDiscardedFeatureIndices),
-numFeatures(numFeatures),
-numSamples(numSamples),
-numOutputClasses(numOutputClasses),
-generation(generation),
-isLeaf(false),
-outputClass(-1),
-splitFeatureIndex(-1),
-splitFeatureValue(-1),
-splitFeatureEntropy(-1.0),
-ownEntropy(-1.0),
-bootstrappedFeatureVectors(numFeatures, vector<int>(numSamples, 0)),
-bootstrappedOutputVector(numSamples, 0),
-leftChildNode(NULL),
-rightChildNode(NULL),
-parentNode(NULL) {
+ : bootstrappedTrainingSamples(bootstrappedTrainingSamples),
+ globalDiscardedFeatureIndices(globalDiscardedFeatureIndices),
+ numFeatures(numFeatures),
+ numSamples(numSamples),
+ numOutputClasses(numOutputClasses),
+ generation(generation),
+ isLeaf(false),
+ outputClass(-1),
+ nodeId(nodeId),
+ testSampleMisclassificationCount(0),
+ splitFeatureIndex(-1),
+ splitFeatureValue(-1),
+ splitFeatureEntropy(-1.0),
+ ownEntropy(-1.0),
+ featureStandardDeviationThreshold(featureStandardDeviationThreshold),
+ bootstrappedFeatureVectors(numFeatures, vector<int>(numSamples, 0)),
+ bootstrappedOutputVector(numSamples, 0),
+ leftChildNode(NULL),
+ rightChildNode(NULL),
+ parentNode(NULL) {
+
m = MothurOut::getInstance();
for (int i = 0; i < numSamples; i++) { // just doing a simple transpose of the matrix
for (int j = 0; j < numFeatures; j++) { bootstrappedFeatureVectors[j][i] = bootstrappedTrainingSamples[i][j]; }
}
- for (int i = 0; i < numSamples; i++) { if (m->control_pressed) { break; } bootstrappedOutputVector[i] = bootstrappedTrainingSamples[i][numFeatures]; }
+ for (int i = 0; i < numSamples; i++) { if (m->control_pressed) { break; }
+ bootstrappedOutputVector[i] = bootstrappedTrainingSamples[i][numFeatures]; }
createLocalDiscardedFeatureList();
updateNodeEntropy();
/***********************************************************************/
int RFTreeNode::createLocalDiscardedFeatureList(){
try {
-
+
for (int i = 0; i < numFeatures; i++) {
+ // TODO: need to check if bootstrappedFeatureVectors == numFeatures, in python code we are using bootstrappedFeatureVectors instead of numFeatures
if (m->control_pressed) { return 0; }
vector<int>::iterator it = find(globalDiscardedFeatureIndices.begin(), globalDiscardedFeatureIndices.end(), i);
- if (it == globalDiscardedFeatureIndices.end()){ // NOT FOUND
+ if (it == globalDiscardedFeatureIndices.end()) { // NOT FOUND
double standardDeviation = m->getStandardDeviation(bootstrappedFeatureVectors[i]);
- if (standardDeviation <= 0){ localDiscardedFeatureIndices.push_back(i); }
+ if (standardDeviation <= featureStandardDeviationThreshold) { localDiscardedFeatureIndices.push_back(i); }
}
}
try {
vector<int> classCounts(numOutputClasses, 0);
- for (int i = 0; i < bootstrappedOutputVector.size(); i++) { classCounts[bootstrappedOutputVector[i]]++; }
+ for (int i = 0; i < bootstrappedOutputVector.size(); i++) {
+ classCounts[bootstrappedOutputVector[i]]++;
+ }
int totalClassCounts = accumulate(classCounts.begin(), classCounts.end(), 0);
double nodeEntropy = 0.0;
for (int i = 0; i < classCounts.size(); i++) {
// Copyright (c) 2012 Schloss Lab. All rights reserved.
//
-#ifndef rrf_fs_prototype_treenode_hpp
-#define rrf_fs_prototype_treenode_hpp
+#ifndef RF_RFTREENODE_HPP
+#define RF_RFTREENODE_HPP
#include "mothurout.h"
#include "macros.h"
public:
- RFTreeNode(vector< vector<int> > bootstrappedTrainingSamples, vector<int> globalDiscardedFeatureIndices, int numFeatures, int numSamples, int numOutputClasses, int generation);
+ RFTreeNode(vector< vector<int> > bootstrappedTrainingSamples,
+ vector<int> globalDiscardedFeatureIndices,
+ int numFeatures,
+ int numSamples,
+ int numOutputClasses,
+ int generation,
+ int nodeId,
+ float featureStandardDeviationThreshold = 0.0);
virtual ~RFTreeNode(){}
const vector<int>& getBootstrappedOutputVector() { return bootstrappedOutputVector; }
const vector<int>& getFeatureSubsetIndices() { return featureSubsetIndices; }
const double getOwnEntropy() { return ownEntropy; }
+ const int getTestSampleMisclassificationCount() { return testSampleMisclassificationCount; }
// setters
void setIsLeaf(bool isLeaf) { this->isLeaf = isLeaf; }
double splitFeatureEntropy;
double ownEntropy;
+ int nodeId;
+ float featureStandardDeviationThreshold;
+ int testSampleMisclassificationCount;
+
RFTreeNode* leftChildNode;
RFTreeNode* rightChildNode;
RFTreeNode* parentNode;
m->mothurRemove(outSummary+".temp");
}
- if (numFastaSeqs != count) { m->mothurOut("[ERROR]: found " + toString(numFastaSeqs) + " sequences in your fasta file, and " + toString(count) + " sequences in your contigs report file, quitting.\n"); m->control_pressed = true; }
+ if (numFastaSeqs != count) { m->mothurOut("[ERROR]: found " + toString(numFastaSeqs) + " sequences in your fasta file, and " + toString(count) + " sequences in your align report file, quitting.\n"); m->control_pressed = true; }
return count;
vector<int> longHomoPolymer;
vector<int> numNs;
- vector<unsigned long long> positions;
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
positions = m->divideFile(fastafile, processors);
for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
int pid;
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
- if (pid == 0) {
- driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, numNs, fastafile, lines[0]);
+ if (pid == 0) {
+ linePair tempLine(0, positions[positions.size()-1]);
+ driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, numNs, fastafile, tempLine);
#else
int numSeqs = 0;
//#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
helpString += "The set.dir command can also be used to specify the directory where your input files are located, the directory must exist.\n";
helpString += "The set.dir command can also be used to override or set the default location mothur will look for files if it is unable to find them, the directory must exist.\n";
helpString += "The set.dir command can also be used to run mothur in debug mode.\n";
- helpString += "The set.dir command can also be used to set the modifynames parameter. Default=t, meaning if your sequence names contain ':' change them to '_' to aviod issues while making trees. modifynames=F will leave sequence names as they are.\n";
+ helpString += "The set.dir command can also be used to set the modifynames parameter. Default=t, meaning if your sequence names contain ':' change them to '_' to avoid issues while making trees. modifynames=F will leave sequence names as they are.\n";
helpString += "The set.dir command parameters are input, output, tempdefault and debug and one is required.\n";
helpString += "To run mothur in debug mode set debug=true. Default debug=false.\n";
helpString += "To return the output to the same directory as the input files you may enter: output=clear.\n";
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- if (debugOnly) { return 0; }
-
- commandFactory = CommandFactory::getInstance();
-
- m->mothurOut("Mothur's directories:"); m->mothurOutEndLine();
-
- //redirect output
- if ((output == "clear") || (output == "")) { output = ""; commandFactory->setOutputDirectory(output); }
- else if (output == "default") {
- string exepath = m->argv;
- output = exepath.substr(0, (exepath.find_last_of('m')));
-
- m->mothurOut("outputDir=" + output); m->mothurOutEndLine();
- commandFactory->setOutputDirectory(output);
- }else {
- if (m->dirCheck(output)) {
- m->mothurOut("outputDir=" + output); m->mothurOutEndLine();
- commandFactory->setOutputDirectory(output);
+ if (debugOnly) { }
+ else {
+ commandFactory = CommandFactory::getInstance();
+
+ m->mothurOut("Mothur's directories:"); m->mothurOutEndLine();
+
+ //redirect output
+ if ((output == "clear") || (output == "")) { output = ""; commandFactory->setOutputDirectory(output); }
+ else if (output == "default") {
+ string exepath = m->argv;
+ output = exepath.substr(0, (exepath.find_last_of('m')));
+
+ m->mothurOut("outputDir=" + output); m->mothurOutEndLine();
+ commandFactory->setOutputDirectory(output);
+ }else {
+ if (m->dirCheck(output)) {
+ m->mothurOut("outputDir=" + output); m->mothurOutEndLine();
+ commandFactory->setOutputDirectory(output);
+ }
}
- }
-
- //redirect input
- if ((input == "clear") || (input == "")) { input = ""; commandFactory->setInputDirectory(input); }
- else if (input == "default") {
- string exepath = m->argv;
- input = exepath.substr(0, (exepath.find_last_of('m')));
-
- m->mothurOut("inputDir=" + input); m->mothurOutEndLine();
- commandFactory->setInputDirectory(input);
- }else {
- if (m->dirCheck(input)) {
- m->mothurOut("inputDir=" + input); m->mothurOutEndLine();
- commandFactory->setInputDirectory(input);
+
+ //redirect input
+ if ((input == "clear") || (input == "")) { input = ""; commandFactory->setInputDirectory(input); }
+ else if (input == "default") {
+ string exepath = m->argv;
+ input = exepath.substr(0, (exepath.find_last_of('m')));
+
+ m->mothurOut("inputDir=" + input); m->mothurOutEndLine();
+ commandFactory->setInputDirectory(input);
+ }else {
+ if (m->dirCheck(input)) {
+ m->mothurOut("inputDir=" + input); m->mothurOutEndLine();
+ commandFactory->setInputDirectory(input);
+ }
}
- }
-
- //set default
- if (tempdefault == "clear") {
- #ifdef MOTHUR_FILES
- string temp = MOTHUR_FILES;
- m->mothurOut("tempDefault=" + temp); m->mothurOutEndLine();
+
+ //set default
+ if (tempdefault == "clear") {
+#ifdef MOTHUR_FILES
+ string temp = MOTHUR_FILES;
+ m->mothurOut("tempDefault=" + temp); m->mothurOutEndLine();
m->setDefaultPath(temp);
- #else
- string temp = "";
- m->mothurOut("No default directory defined at compile time."); m->mothurOutEndLine();
+#else
+ string temp = "";
+ m->mothurOut("No default directory defined at compile time."); m->mothurOutEndLine();
m->setDefaultPath(temp);
- #endif
- }else if (tempdefault == "") { //do nothing
- }else if (tempdefault == "default") {
- string exepath = m->argv;
- tempdefault = exepath.substr(0, (exepath.find_last_of('m')));
-
- m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();
- m->setDefaultPath(tempdefault);
- }else {
- if (m->dirCheck(tempdefault)) {
+#endif
+ }else if (tempdefault == "") { //do nothing
+ }else if (tempdefault == "default") {
+ string exepath = m->argv;
+ tempdefault = exepath.substr(0, (exepath.find_last_of('m')));
+
m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();
- m->setDefaultPath(tempdefault);
+ m->setDefaultPath(tempdefault);
+ }else {
+ if (m->dirCheck(tempdefault)) {
+ m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();
+ m->setDefaultPath(tempdefault);
+ }
}
}
-
return 0;
}
catch(exception& e) {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- commandFactory = CommandFactory::getInstance();
-
- commandFactory->setLogfileName(name, append);
+ commandFactory = CommandFactory::getInstance();
+
+ string directory = m->hasPath(name);
+ if (directory == "") {
+ commandFactory->setLogfileName(name, append);
+ }else if (m->dirCheck(directory)) {
+ commandFactory->setLogfileName(name, append);
+ }
return 0;
}
if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
}
else {
- if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; }
+ if (toupper(temp[0]) == 'A') { flowOrder = "A"; }
else if(toupper(temp[0]) == 'B'){
- flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; }
+ flowOrder = "B"; }
else if(toupper(temp[0]) == 'I'){
- flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; }
+ flowOrder = "I"; }
else {
m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
}
string fileroot = outputDir + m->getRootName(m->getSimpleName(filename));
map<string, string> variables;
variables["[filename]"] = fileroot;
- string fasta = fileroot + getOutputFileName("fasta",variables);
- string name = fileroot + getOutputFileName("name",variables);
- string group = fileroot + getOutputFileName("group",variables);
+ string fasta = getOutputFileName("fasta",variables);
+ string name = getOutputFileName("name",variables);
+ string group = getOutputFileName("group",variables);
if (m->control_pressed) { return 0; }
//commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
string getHelpString();
string getCitation() { return "Friedman J, Alm EJ (2012) Inferring Correlation Networks from Genomic Survey Data. PLoS Comput Biol 8(9): e1002687. doi:10.1371/journal.pcbi.1002687 http://www.mothur.org/wiki/Sparcc"; }
- string getDescription() { return "brief description"; }
+ string getDescription() { return "Calculates correlations between OTUs using a method that is insensitive to the use of relative abundance data"; }
int execute();
void help() { m->mothurOut(getHelpString()); }
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+ if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
- reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
success = 0;
break;
}
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > bdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+ if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
- reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
forwardQual.trimQScores(foligo.length(), -1);
- reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+ reverseQual.trimQScores(roligo.length(), -1);
success = 0;
break;
}
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > bdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
break;
}
+ if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
+
if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
group = it->first;
string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > bdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
group = matches[0];
fStart = minFPos[0];
rStart = rawSeq.length() - minRPos[0];
+ if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
}
//we have an acceptable match for the forward and reverse, but do they match?
break;
}
+ if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
+
if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
group = it->first;
if (!keepForward) {
success = pdiffs + 10;
break;
}
- //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
+ //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
oligo = alignment->getSeqAAln();
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > pdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
group = matches[0];
fStart = minFPos[0];
rStart = rawSeq.length() - minRPos[0];
+ if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
}
//we have an acceptable match for the forward and reverse, but do they match?
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+ if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
- reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
forwardQual.trimQScores(foligo.length(), -1);
- reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+ reverseQual.trimQScores(roligo.length(), -1);
success = 0;
break;
}
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > pdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
- reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
success = 0;
break;
}
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > pdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
- CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
+ CommandParameter pminlength("minlength", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pminlength);
CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
m->mothurConvert(temp, maxHomoP);
- temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
+ temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "1"; }
m->mothurConvert(temp, minLength);
temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
int currentSeqsDiffs = 0;
Sequence currSeq(inFASTA); m->gobble(inFASTA);
- //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
+ //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
QualityScores currQual; QualityScores savedQual;
}
else{ currentSeqsDiffs += success; }
}
-
+ //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
if(numSpacers != 0){
success = trimOligos->stripSpacer(currSeq, currQual);
if(success > sdiffs) { trashCode += 's'; }
int thisBarcodeIndex = 0;
int thisPrimerIndex = 0;
-
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
if(numBarcodes != 0){
thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
if(thisSuccess > bdiffs) { thisTrashCode += "b"; }
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
-
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
if(numFPrimers != 0){
thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
if(thisSuccess > pdiffs) { thisTrashCode += "f"; }