A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721765613BB9F7D0014DAAE /* referencedb.cpp */; };
A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; };
A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
+ A73901081588C40900ED2ED6 /* loadlogfilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73901071588C40900ED2ED6 /* loadlogfilecommand.cpp */; };
A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */; };
A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; };
A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
+ A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; };
A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; };
A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A76CDD811510F143004C8458 /* prcseqscommand.cpp */; };
A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; };
A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; };
A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; };
+ A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; };
A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; };
A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; };
A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; };
A7E9B92912D37EC400DA6239 /* readblast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7B012D37EC400DA6239 /* readblast.cpp */; };
A7E9B92A12D37EC400DA6239 /* readcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7B212D37EC400DA6239 /* readcluster.cpp */; };
A7E9B92B12D37EC400DA6239 /* readcolumn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7B412D37EC400DA6239 /* readcolumn.cpp */; };
- A7E9B92C12D37EC400DA6239 /* readdistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7B612D37EC400DA6239 /* readdistcommand.cpp */; };
- A7E9B92E12D37EC400DA6239 /* readotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7BB12D37EC400DA6239 /* readotucommand.cpp */; };
A7E9B92F12D37EC400DA6239 /* readphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7BD12D37EC400DA6239 /* readphylip.cpp */; };
A7E9B93012D37EC400DA6239 /* readtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7BF12D37EC400DA6239 /* readtree.cpp */; };
- A7E9B93112D37EC400DA6239 /* readtreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7C112D37EC400DA6239 /* readtreecommand.cpp */; };
A7E9B93212D37EC400DA6239 /* removegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7C312D37EC400DA6239 /* removegroupscommand.cpp */; };
A7E9B93312D37EC400DA6239 /* removelineagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7C512D37EC400DA6239 /* removelineagecommand.cpp */; };
A7E9B93412D37EC400DA6239 /* removeotuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7C712D37EC400DA6239 /* removeotuscommand.cpp */; };
A724D2B6153C8628000A826F /* makebiomcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makebiomcommand.cpp; sourceTree = "<group>"; };
A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
A727864312E9E28C00F86ABA /* removerarecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removerarecommand.cpp; sourceTree = "<group>"; };
+ A73901051588C3EF00ED2ED6 /* loadlogfilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = loadlogfilecommand.h; sourceTree = "<group>"; };
+ A73901071588C40900ED2ED6 /* loadlogfilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = loadlogfilecommand.cpp; sourceTree = "<group>"; };
A73DDBB813C4A0D1006AAE38 /* clearmemorycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clearmemorycommand.h; sourceTree = "<group>"; };
A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clearmemorycommand.cpp; sourceTree = "<group>"; };
A73DDC3613C4BF64006AAE38 /* mothurmetastats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mothurmetastats.h; sourceTree = "<group>"; };
A74A9A9E148E881E00AB5E3E /* spline.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = spline.cpp; sourceTree = "<group>"; };
A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = "<group>"; };
A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = "<group>"; };
+ A74D59A3159A1E2000043046 /* counttable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = counttable.cpp; sourceTree = "<group>"; };
+ A74D59A6159A1E3600043046 /* counttable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = counttable.h; sourceTree = "<group>"; };
A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summaryqualcommand.h; sourceTree = "<group>"; };
A754149614840CF7005850D1 /* summaryqualcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summaryqualcommand.cpp; sourceTree = "<group>"; };
A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = "<group>"; };
A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treereader.h; sourceTree = "<group>"; };
A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treereader.cpp; sourceTree = "<group>"; };
A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = "<group>"; };
+ A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sparsedistancematrix.cpp; sourceTree = "<group>"; };
+ A7E0243F15B4522000A5F046 /* sparsedistancematrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sparsedistancematrix.h; sourceTree = "<group>"; };
A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
A7E9B65112D37EC300DA6239 /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligncommand.cpp; sourceTree = "<group>"; };
A7E9B7B312D37EC400DA6239 /* readcluster.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readcluster.h; sourceTree = "<group>"; };
A7E9B7B412D37EC400DA6239 /* readcolumn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readcolumn.cpp; sourceTree = "<group>"; };
A7E9B7B512D37EC400DA6239 /* readcolumn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readcolumn.h; sourceTree = "<group>"; };
- A7E9B7B612D37EC400DA6239 /* readdistcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readdistcommand.cpp; sourceTree = "<group>"; };
- A7E9B7B712D37EC400DA6239 /* readdistcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readdistcommand.h; sourceTree = "<group>"; };
A7E9B7B812D37EC400DA6239 /* readmatrix.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = readmatrix.hpp; sourceTree = "<group>"; };
- A7E9B7BB12D37EC400DA6239 /* readotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readotucommand.cpp; sourceTree = "<group>"; };
- A7E9B7BC12D37EC400DA6239 /* readotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readotucommand.h; sourceTree = "<group>"; };
A7E9B7BD12D37EC400DA6239 /* readphylip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylip.cpp; sourceTree = "<group>"; };
A7E9B7BE12D37EC400DA6239 /* readphylip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylip.h; sourceTree = "<group>"; };
A7E9B7BF12D37EC400DA6239 /* readtree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readtree.cpp; sourceTree = "<group>"; };
A7E9B7C012D37EC400DA6239 /* readtree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readtree.h; sourceTree = "<group>"; };
- A7E9B7C112D37EC400DA6239 /* readtreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readtreecommand.cpp; sourceTree = "<group>"; };
- A7E9B7C212D37EC400DA6239 /* readtreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readtreecommand.h; sourceTree = "<group>"; };
A7E9B7C312D37EC400DA6239 /* removegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removegroupscommand.cpp; sourceTree = "<group>"; };
A7E9B7C412D37EC400DA6239 /* removegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removegroupscommand.h; sourceTree = "<group>"; };
A7E9B7C512D37EC400DA6239 /* removelineagecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removelineagecommand.cpp; sourceTree = "<group>"; };
A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */,
A7E9B73E12D37EC400DA6239 /* listseqscommand.h */,
A7E9B73D12D37EC400DA6239 /* listseqscommand.cpp */,
+ A73901051588C3EF00ED2ED6 /* loadlogfilecommand.h */,
+ A73901071588C40900ED2ED6 /* loadlogfilecommand.cpp */,
A7FA10001302E096003860FE /* mantelcommand.h */,
A7FA10011302E096003860FE /* mantelcommand.cpp */,
A724D2B4153C8600000A826F /* makebiomcommand.h */,
A7E9B7AB12D37EC400DA6239 /* rarefactcommand.cpp */,
A7E9B7AF12D37EC400DA6239 /* rarefactsharedcommand.h */,
A7E9B7AE12D37EC400DA6239 /* rarefactsharedcommand.cpp */,
- A7E9B7B712D37EC400DA6239 /* readdistcommand.h */,
- A7E9B7B612D37EC400DA6239 /* readdistcommand.cpp */,
- A7E9B7BC12D37EC400DA6239 /* readotucommand.h */,
- A7E9B7BB12D37EC400DA6239 /* readotucommand.cpp */,
- A7E9B7C212D37EC400DA6239 /* readtreecommand.h */,
- A7E9B7C112D37EC400DA6239 /* readtreecommand.cpp */,
A7E9B7C412D37EC400DA6239 /* removegroupscommand.h */,
A7E9B7C312D37EC400DA6239 /* removegroupscommand.cpp */,
A7E9B7C612D37EC400DA6239 /* removelineagecommand.h */,
A7E9B66312D37EC400DA6239 /* blastalign.hpp */,
A7E9B66412D37EC400DA6239 /* blastdb.cpp */,
A7E9B66512D37EC400DA6239 /* blastdb.hpp */,
+ A74D59A6159A1E3600043046 /* counttable.h */,
+ A74D59A3159A1E2000043046 /* counttable.cpp */,
+ A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
A7E9B6BD12D37EC400DA6239 /* database.cpp */,
A7E9B6BE12D37EC400DA6239 /* database.hpp */,
A7E9B6BF12D37EC400DA6239 /* datavector.hpp */,
- A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
A7E9B6CE12D37EC400DA6239 /* distancedb.hpp */,
A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */,
A7E9B6DF12D37EC400DA6239 /* fastamap.h */,
A7E9B81412D37EC400DA6239 /* sharedsabundvector.h */,
A7E9B83912D37EC400DA6239 /* sparsematrix.cpp */,
A7E9B83A12D37EC400DA6239 /* sparsematrix.hpp */,
+ A7E0243F15B4522000A5F046 /* sparsedistancematrix.h */,
+ A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */,
A7E9B85112D37EC400DA6239 /* suffixdb.cpp */,
A7E9B85212D37EC400DA6239 /* suffixdb.hpp */,
A7E9B85312D37EC400DA6239 /* suffixnodes.cpp */,
A7E9B92912D37EC400DA6239 /* readblast.cpp in Sources */,
A7E9B92A12D37EC400DA6239 /* readcluster.cpp in Sources */,
A7E9B92B12D37EC400DA6239 /* readcolumn.cpp in Sources */,
- A7E9B92C12D37EC400DA6239 /* readdistcommand.cpp in Sources */,
- A7E9B92E12D37EC400DA6239 /* readotucommand.cpp in Sources */,
A7E9B92F12D37EC400DA6239 /* readphylip.cpp in Sources */,
A7E9B93012D37EC400DA6239 /* readtree.cpp in Sources */,
- A7E9B93112D37EC400DA6239 /* readtreecommand.cpp in Sources */,
A7E9B93212D37EC400DA6239 /* removegroupscommand.cpp in Sources */,
A7E9B93312D37EC400DA6239 /* removelineagecommand.cpp in Sources */,
A7E9B93412D37EC400DA6239 /* removeotuscommand.cpp in Sources */,
A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */,
A70056E6156A93D000924A2D /* getotulabelscommand.cpp in Sources */,
A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */,
+ A73901081588C40900ED2ED6 /* loadlogfilecommand.cpp in Sources */,
+ A74D59A4159A1E2000043046 /* counttable.cpp in Sources */,
+ A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
GCC_OPTIMIZATION_LEVEL = 3;
GCC_PREPROCESSOR_DEFINITIONS = (
"MOTHUR_FILES=\"\\\"../release\\\"\"",
- "VERSION=\"\\\"1.25.0\\\"\"",
- "RELEASE_DATE=\"\\\"5/01/2012\\\"\"",
+ "VERSION=\"\\\"1.26.0\\\"\"",
+ "RELEASE_DATE=\"\\\"7/9/2012\\\"\"",
);
"GCC_VERSION[arch=*]" = "";
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_MODEL_TUNING = "";
GCC_OPTIMIZATION_LEVEL = 3;
GCC_PREPROCESSOR_DEFINITIONS = (
- "VERSION=\"\\\"1.25.0\\\"\"",
- "RELEASE_DATE=\"\\\"4/30/2012\\\"\"",
+ "VERSION=\"\\\"1.26.0\\\"\"",
+ "RELEASE_DATE=\"\\\"7/9/2012\\\"\"",
);
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
exit(1);
}
}
+//**********************************************************************************************************************
+string AlignCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string tag = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { tag = "align"; }
+ else if (type == "alignreport") { tag = "align.report"; }
+ else if (type == "accnos") { tag = "flip.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return tag;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AlignCommand", "getOutputFileName");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
AlignCommand::AlignCommand(){
try {
m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine();
if (outputDir == "") { outputDir += m->hasPath(candidateFileNames[s]); }
- string alignFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "align";
- string reportFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "align.report";
- string accnosFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "flip.accnos";
+ string alignFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + getOutputFileNameTag("fasta");
+ string reportFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + getOutputFileNameTag("alignreport");
+ string accnosFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + getOutputFileNameTag("accnos");
bool hasAccnos = true;
int numFastaSeqs = 0;
if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
in.close(); m->mothurRemove(tempFile);
- appendAlignFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
+ m->appendFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
m->mothurRemove((alignFileName + toString(processIDS[i]) + ".temp"));
appendReportFiles((reportFileName + toString(processIDS[i]) + ".temp"), reportFileName);
rename(nonBlankAccnosFiles[0].c_str(), accnosFName.c_str());
for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
- appendAlignFiles(nonBlankAccnosFiles[h], accnosFName);
+ m->appendFiles(nonBlankAccnosFiles[h], accnosFName);
m->mothurRemove(nonBlankAccnosFiles[h]);
}
}else { //recreate the accnosfile if needed
else { m->mothurRemove(accnosFName); } //remove so other files can be renamed to it
for (int i = 1; i < processors; i++) {
- appendAlignFiles((alignFileName + toString(i) + ".temp"), alignFileName);
+ m->appendFiles((alignFileName + toString(i) + ".temp"), alignFileName);
m->mothurRemove((alignFileName + toString(i) + ".temp"));
appendReportFiles((reportFileName + toString(i) + ".temp"), reportFileName);
rename(nonBlankAccnosFiles[0].c_str(), accnosFName.c_str());
for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
- appendAlignFiles(nonBlankAccnosFiles[h], accnosFName);
+ m->appendFiles(nonBlankAccnosFiles[h], accnosFName);
m->mothurRemove(nonBlankAccnosFiles[h]);
}
}else { //recreate the accnosfile if needed
exit(1);
}
}
-/**************************************************************************************************/
-
-void AlignCommand::appendAlignFiles(string temp, string filename) {
- try{
-
- ofstream output;
- ifstream input;
- m->openOutputFileAppend(filename, output);
- m->openInputFile(temp, input);
-
- while(char c = input.get()){
- if(input.eof()) { break; }
- else { output << c; }
- }
-
- input.close();
- output.close();
- }
- catch(exception& e) {
- m->errorOut(e, "AlignCommand", "appendAlignFiles");
- exit(1);
- }
-}
//**********************************************************************************************************************
void AlignCommand::appendReportFiles(string temp, string filename) {
while (!input.eof()) { char c = input.get(); if (c == 10 || c == 13){ break; } } // get header line
- while(char c = input.get()){
- if(input.eof()) { break; }
- else { output << c; }
- }
+ char buffer[4096];
+ while (!input.eof()) {
+ input.read(buffer, 4096);
+ output.write(buffer, input.gcount());
+ }
input.close();
output.close();
vector<string> setParameters();
string getCommandName() { return "align.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "DeSantis TZ, Jr., Hugenholtz P, Keller K, Brodie EL, Larsen N, Piceno YM, Phan R, Andersen GL (2006). NAST: a multiple sequence alignment server for comparative analysis of 16S rRNA genes. Nucleic Acids Res 34: W394-9.\nSchloss PD (2009). A high-throughput DNA sequence aligner for microbial ecology studies. PLoS ONE 4: e8230.\nSchloss PD (2010). The effects of alignment quality, distance calculation method, sequence filtering, and region on the analysis of 16S rRNA gene-based studies. PLoS Comput Biol 6: e1000844.\nhttp://www.mothur.org/wiki/Align.seqs http://www.mothur.org/wiki/Align.seqs"; }
string getDescription() { return "align sequences"; }
int driver(linePair*, string, string, string, string);
int createProcesses(string, string, string, string);
- void appendAlignFiles(string, string);
void appendReportFiles(string, string);
#ifdef USE_MPI
}
}
//**********************************************************************************************************************
+string AmovaCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string tag = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "amova") { tag = "amova"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file.\n"); }
+ }
+ return tag;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AmovaCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
AmovaCommand::AmovaCommand(){
try {
abort = true; calledHelp = true;
//create a new filename
ofstream AMOVAFile;
- string AMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + "amova";
+ string AMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("amova");
m->openOutputFile(AMOVAFileName, AMOVAFile);
outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
vector<string> setParameters();
string getCommandName() { return "amova"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.\nhttp://www.mothur.org/wiki/Amova"; }
string getDescription() { return "analysis of molecular variance"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string AnosimCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "anosim") { outputFileName = "anosim"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AnosimCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
AnosimCommand::AnosimCommand(){
try {
//create a new filename
ofstream ANOSIMFile;
- string ANOSIMFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + "anosim";
+ string ANOSIMFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("anosim");
m->openOutputFile(ANOSIMFileName, ANOSIMFile);
outputNames.push_back(ANOSIMFileName); outputTypes["anosim"].push_back(ANOSIMFileName);
m->mothurOut("\ncomparison\tR-value\tP-value\n");
vector<string> setParameters();
string getCommandName() { return "anosim"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Clarke, K. R. (1993). Non-parametric multivariate analysis of changes in community structure. _Australian Journal of Ecology_ 18, 117-143.\nhttp://www.mothur.org/wiki/Anosim"; }
string getDescription() { return "analysis of similarity"; }
#include "mothur.h"
#include "cluster.hpp"
#include "rabundvector.hpp"
-#include "sparsematrix.hpp"
+#include "sparsedistancematrix.h"
/* This class implements the average UPGMA, average neighbor clustering algorithm */
/***********************************************************************/
-AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string s) :
- Cluster(rav, lv, dm, c, s)
+AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
+Cluster(rav, lv, dm, c, s)
{
saveRow = -1;
saveCol = -1;
/***********************************************************************/
//This function updates the distance based on the average linkage method.
-bool AverageLinkage::updateDistance(MatData& colCell, MatData& rowCell) {
+bool AverageLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) {
try {
if ((saveRow != smallRow) || (saveCol != smallCol)) {
rowBin = rabund->get(smallRow);
saveRow = smallRow;
saveCol = smallCol;
}
-
- colCell->dist = (colBin * colCell->dist + rowBin * rowCell->dist) / totalBin;
-
+ //cout << "colcell.dist = " << colCell.dist << '\t' << smallRow << '\t' << smallCol << '\t' << rowCell.dist << endl;
+ colCell.dist = (colBin * colCell.dist + rowBin * rowCell.dist) / totalBin;
+
return(true);
}
catch(exception& e) {
}
}
//**********************************************************************************************************************
+string BinSeqCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "fasta"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "BinSeqCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
BinSeqCommand::BinSeqCommand(){
try {
abort = true; calledHelp = true;
try {
string binnames, name, sequence;
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + ".fasta";
- m->openOutputFile(outputFileName, out);
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + getOutputFileNameTag("fasta");
+ m->openOutputFile(outputFileName, out);
//save to output list of output file names
outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
vector<string> setParameters();
string getCommandName() { return "bin.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Bin.seqs"; }
string getDescription() { return "maps sequences to otus"; }
//double findMax(vector<double>); //This returns the maximum value in the vector.
int numNZ(vector<int>); //This returns the number of non-zero values in the vector.
double numNZ(vector<double>); //This returns the number of non-zero values in the vector.
- //double numPos(vector<double>); //This returns the number of positive values in the vector.
- //double findMaxDiff(vector<double>, vector<double>); //This returns the absolute value of the maximum difference between the two vectors.
- //double findDStat(vector<double>, vector<double>, double); //This returns the D-Statistic of the two vectors with the given total number of species.
- //vector<int> findQuartiles(vector<double>); //This returns a vector with the first element being the index of the lower quartile of the vector and the second element being the index of the upper quartile of the vector.
- //vector<double> add(vector<double>, double); //This adds the given number to every element in the given vector and returns the new vector.
- //vector<double> multiply(vector<double>, double); //This multiplies every element in the given vector by the given number and returns the new vector.
- //vector<double> power(vector<double>, double); //This raises every element in the given vector to the given number and returns the new vector.
- //vector<double> addVecs(vector<double>,vector<double>); //The given vectors must be the same size. This adds the ith element of the first given vector to the ith element of the second given vector and returns the new vector.
- //vector<double> multVecs(vector<double>,vector<double>); //The given vectors must be the same size. This multiplies the ith element of the first given vector to the ith element of the second given vector and returns the new vector.
- //vector<double> remDup(vector<double>); //This returns a vector that contains 1 of each unique element in the given vector. The order of the elements is not changed.
- //vector<double> genCVec(vector<double>); //This returns a cumilative vector of the given vector. The ith element of the returned vector is the sum of all the elements in the given vector up to i.
- //vector<double> genRelVec(vector<double>); //This finds the sum of all the elements in the given vector and then divides the ith element in the given vector by that sum and then puts the result into a new vector, which is returned after all of the elements in the given vector have been used.
- ///vector<double> genDiffVec(vector<double>, vector<double>);//This subtracts the ith element of the second given vector from the ith element of the first given vector and returns the new vector.
- //vector<double> genCSVec(vector<double>);//This calculates the number of species that have the same number of individuals as the ith element of the given vector and then returns a cumulative vector.
- //vector<double> genTotVec(vector<vector<double> >); //This adds up the ith element of all the columns and puts that value into a new vector. It those this for all the rows and then returns the new vector.
- //vector<double> quicksort(vector<double>); //This sorts the given vector from highest to lowest and returns the sorted vector.
- //vector<vector<double> > gen2DVec(vector<double>, int, int); //(vector, #rows/columns, 0 if the second parameter was rows, 1 if the second parameter was columns) Transforms a single vector that was formatted like a table into a 2D vector.
- //vector<string> getSData(char[]);//This takes a file name as a parameter and reads all of the data in the file into a <string> vector.
};
-/**************************************************************************************************/
-
-/*This Class is similar to the GeometricSeries.h class. It calculates
-the broken stick distribution of the table and prints the D-Statistic
-and the confidence limits for the Kolmogorov-Smirnov 1-Sample test
-with a 95% confidence level.
-
-class BrokenStick
-{
- public:
- void doBStick(vector<double>);
-};
-
-//**************************************************************************************************/
-/*This Class calculates the geometric series distribution for the data.
-It prints the D-Statistic and the critical values for the Kolmogorov-Smirnov
-1-sample test at the 95% confidence interval.*/
-
-/*class GeometricSeries
-{
- public:
- void doGeomTest(vector<double>);
-};*/
-
-/**************************************************************************************************
-//This Class calculates the jackknifed estimate of the data and
-//prints it and the confidence limits at a chosen confidence level.
-
-class Jackknifing
-{
- public:
- void doJK(vector<double>, double);
-};
-/**************************************************************************************************
-/*This Class stores calculates the Kolmogorov-Smirnov 2-Sample test between two samples.
-It prints the D-Statistic and the critical value for the test at
-the 90% and 95% confidence interval.
-
-class KS2SampleTest
-{
- public:
- void doKSTest(vector<double>, vector<double>);
-};
-
-/**************************************************************************************************
-//This Class calculates and prints the Q-Statistic for the data.
-class QStatistic
-{
- public:
- void doQStat(vector<double>);
-};
-/**************************************************************************************************
-class SSBPDiversityIndices
-{
- public:
- void doSSBP(vector<double>);
- double getShan(vector<double> vec);//The Shannon Index
- double getSimp(vector<double> vec);//The Simpson Index
- double getBP(vector<double> vec);//The Berger-Parker Index
-};
/**************************************************************************************************/
//This Class stores the table of the confidence limits of the Student-T distribution.
class TDTable
public:
double getConfLimit(int,int);
};
-
-/**************************************************************************************************
-//This Class stores the table of the confidence limits of the One-Sample Kolmogorov-Smirnov Test.
-class KOSTable
-{
- public:
- double getConfLimit(int);
-};
-
/**************************************************************************************************/
-/*This Class calculates the truncated lognormal for the data.
-It then prints the D-Statistic and the critical values for the
-Kolmogorov-Smirnov 1-Sample test.*
-
-class TrunLN
-{
- public:
- void doTrunLN(vector<double>, vector<double>);
-};
-/**************************************************************************************************/
-
#endif
}
}
//**********************************************************************************************************************
+string CatchAllCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "csv") { outputFileName = "csv"; }
+ else if (type == "summary") { outputFileName = "catchall.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CatchAllCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
CatchAllCommand::CatchAllCommand(){
try {
abort = true; calledHelp = true;
set<string> processedLabels;
set<string> userLabels = labels;
- string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "catchall.summary";
+ string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + getOutputFileNameTag("summary");
summaryfilename = m->getFullPathName(summaryfilename);
if (m->debug) { m->mothurOut("[DEBUG]: Input File = " + inputFileNames[p] + ".\n[DEBUG]: inputdata address = " + toString(&input) + ".\n[DEBUG]: sabund address = " + toString(&sabund) + ".\n"); }
filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
- outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
- outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
- outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
- outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
+ outputNames.push_back(filename + "_Analysis." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_Analysis." + getOutputFileNameTag("csv"));
+ outputNames.push_back(filename + "_BestModelsAnalysis." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis." + getOutputFileNameTag("csv"));
+ outputNames.push_back(filename + "_BestModelsFits." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_BestModelsFits." + getOutputFileNameTag("csv"));
+ outputNames.push_back(filename + "_BubblePlot." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_BubblePlot." + getOutputFileNameTag("csv"));
if (m->debug) { m->mothurOut("[DEBUG]: About to create summary file for: " + filename + ".\n[DEBUG]: sabund label = " + sabund->getLabel() + ".\n"); }
filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
- outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
- outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
- outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
- outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
+ outputNames.push_back(filename + "_Analysis." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_Analysis." + getOutputFileNameTag("csv"));
+ outputNames.push_back(filename + "_BestModelsAnalysis." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis." + getOutputFileNameTag("csv"));
+ outputNames.push_back(filename + "_BestModelsFits." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_BestModelsFits." + getOutputFileNameTag("csv"));
+ outputNames.push_back(filename + "_BubblePlot." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_BubblePlot." + getOutputFileNameTag("csv"));
if (m->debug) { m->mothurOut("[DEBUG]: About to create summary file for: " + filename + ".\n[DEBUG]: sabund label = " + sabund->getLabel() + ".\n"); }
filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
- outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
- outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
- outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
- outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
- if (m->debug) { m->mothurOut("[DEBUG]: About to create summary file for: " + filename + ".\n[DEBUG]: sabund label = " + sabund->getLabel() + ".\n"); }
+ outputNames.push_back(filename + "_Analysis." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_Analysis." + getOutputFileNameTag("csv"));
+ outputNames.push_back(filename + "_BestModelsAnalysis." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis." + getOutputFileNameTag("csv"));
+ outputNames.push_back(filename + "_BestModelsFits." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_BestModelsFits." + getOutputFileNameTag("csv"));
+ outputNames.push_back(filename + "_BubblePlot." + getOutputFileNameTag("csv")); outputTypes["csv"].push_back(filename + "_BubblePlot." + getOutputFileNameTag("csv")); if (m->debug) { m->mothurOut("[DEBUG]: About to create summary file for: " + filename + ".\n[DEBUG]: sabund label = " + sabund->getLabel() + ".\n"); }
createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
try {
ofstream out;
- string combineFileName = savedOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "catchall.summary";
+ string combineFileName = savedOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("summary");
//open combined file
m->openOutputFile(combineFileName, out);
vector<string> setParameters();
string getCommandName() { return "catchall"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Bunge, J. (2011). Estimating the number of species with CatchAll. Forthcoming in Proceedings of the Pacific Symposium on Biocomputing 2011.\nhttp://www.northeastern.edu/catchall/index.html http://www.mothur.org/wiki/Catchall"; }
string getDescription() { return "estimate number of species"; }
}
}
//**********************************************************************************************************************
+string ChimeraBellerophonCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "chimera") { outputFileName = "bellerophon.chimeras"; }
+ else if (type == "accnos") { outputFileName = "bellerophon.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraBellerophonCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ChimeraBellerophonCommand::ChimeraBellerophonCommand(){
try {
abort = true; calledHelp = true;
chimera = new Bellerophon(fastaFileNames[i], filter, correction, window, increment, processors, outputDir);
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[i]); }//if user entered a file with a path then preserve it
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "bellerophon.chimeras";
- string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "bellerophon.accnos";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + getOutputFileNameTag("chimera");
+ string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + getOutputFileNameTag("accnos");
chimera->getChimeras();
vector<string> setParameters();
string getCommandName() { return "chimera.bellerophon"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Huber T, Faulkner G, Hugenholtz P (2004). Bellerophon: a program to detect chimeric sequences in multiple sequence alignments. Bioinformatics 20: 2317-9. \nhttp://www.mothur.org/wiki/Chimera.bellerophon"; }
string getDescription() { return "detect chimeric sequences"; }
}
}
//**********************************************************************************************************************
+string ChimeraCcodeCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "chimera") { outputFileName = "ccode.chimeras"; }
+ else if (type == "mapinfo") { outputFileName = "mapinfo"; }
+ else if (type == "accnos") { outputFileName = "ccode.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraCcodeCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ChimeraCcodeCommand::ChimeraCcodeCommand(){
try {
abort = true; calledHelp = true;
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
string outputFileName, accnosFileName;
if (maskfile != "") {
- outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.chimeras";
- accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.accnos";
+ outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + getOutputFileNameTag("chimera");
+ accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + getOutputFileNameTag("accnos");
}else {
- outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "ccode.chimeras";
- accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "ccode.accnos";
+ outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
+ accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
+
}
- string mapInfo = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "mapinfo";
+ string mapInfo = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("mapinfo");
+
if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); return 0; }
vector<string> setParameters();
string getCommandName() { return "chimera.ccode"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Gonzalez JM, Zimmermann J, Saiz-Jimenez C (2005). Evaluating putative chimeric sequences from PCR-amplified products. Bioinformatics 21: 333-7. \nhttp://www.mothur.org/wiki/Chimera.ccode"; }
string getDescription() { return "detect chimeric sequences"; }
}
}
//**********************************************************************************************************************
+string ChimeraCheckCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "chimera") { outputFileName = "chimeracheck.chimeras"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraCcodeCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ChimeraCheckCommand::ChimeraCheckCommand(){
try {
abort = true; calledHelp = true;
if (m->control_pressed) { delete chimera; return 0; }
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[i]); }//if user entered a file with a path then preserve it
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "chimeracheck.chimeras";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + getOutputFileNameTag("chimera");
outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
#ifdef USE_MPI
//report progress
if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ count++;
}
//report progress
if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
vector<string> setParameters();
string getCommandName() { return "chimera.check"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "CHIMERA_CHECK version 2.7 written by Niels Larsen (http://wdcm.nig.ac.jp/RDP/docs/chimera_doc.html) \nhttp://www.mothur.org/wiki/Chimera.check"; }
string getDescription() { return "detect chimeric sequences"; }
}
}
//**********************************************************************************************************************
+string ChimeraPerseusCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "chimera") { outputFileName = "perseus.chimeras"; }
+ else if (type == "accnos") { outputFileName = "perseus.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraPerseusCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ChimeraPerseusCommand::ChimeraPerseusCommand(){
try {
abort = true; calledHelp = true;
int start = time(NULL);
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.chimera";
- string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.accnos";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
+ string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
+
//string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
//you provided a groupfile
vector<string> setParameters();
string getCommandName() { return "chimera.perseus"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Chimera.perseus\n"; }
string getDescription() { return "detect chimeric sequences"; }
}
}
//**********************************************************************************************************************
+string ChimeraPintailCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "chimera") { outputFileName = "pintail.chimeras"; }
+ else if (type == "accnos") { outputFileName = "pintail.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraPintailCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ChimeraPintailCommand::ChimeraPintailCommand(){
try {
abort = true; calledHelp = true;
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
string outputFileName, accnosFileName;
if (maskfile != "") {
- outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
- accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
+ outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + getOutputFileNameTag("chimera");
+ accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + getOutputFileNameTag("accnos");
}else {
- outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.chimeras";
- accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.accnos";
+ outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
+ accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
}
if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
vector<string> setParameters();
string getCommandName() { return "chimera.pintail"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Ashelford KE, Chuzhanova NA, Fry JC, Jones AJ, Weightman AJ (2005). At least 1 in 20 16S rRNA sequence records currently held in public repositories is estimated to contain substantial anomalies. Appl Environ Microbiol 71: 7724-36. \nAshelford KE, Chuzhanova NA, Fry JC, Jones AJ, Weightman AJ (2006). New screening software shows that most recent large 16S rRNA gene clone libraries contain chimeras. Appl Environ Microbiol 72: 5734-41. \nhttp://www.mothur.org/wiki/Chimera.pintail"; }
string getDescription() { return "detect chimeric sequences"; }
}
}
//**********************************************************************************************************************
+string ChimeraSlayerCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "chimera") { outputFileName = "slayer.chimeras"; }
+ else if (type == "accnos") { outputFileName = "slayer.accnos"; }
+ else if (type == "fasta") { outputFileName = "slayer.fasta"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayerCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ChimeraSlayerCommand::ChimeraSlayerCommand(){
try {
abort = true; calledHelp = true;
int start = time(NULL);
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera";
- string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos";
- string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.fasta";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
+ string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
+ string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("fasta");
//clears files
ofstream out, out1, out2;
vector<string> setParameters();
string getCommandName() { return "chimera.slayer"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Haas BJ, Gevers D, Earl A, Feldgarden M, Ward DV, Giannokous G, Ciulla D, Tabbaa D, Highlander SK, Sodergren E, Methe B, Desantis TZ, Petrosino JF, Knight R, Birren BW (2011). Chimeric 16S rRNA sequence formation and detection in Sanger and 454-pyrosequenced PCR amplicons. Genome Res. \nhttp://www.mothur.org/wiki/Chimera.slayer"; }
string getDescription() { return "detect chimeric sequences"; }
}
}
//**********************************************************************************************************************
+string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "chimera") { outputFileName = "uchime.chimeras"; }
+ else if (type == "accnos") { outputFileName = "uchime.accnos"; }
+ else if (type == "alns") { outputFileName = "uchime.alns"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ChimeraUchimeCommand::ChimeraUchimeCommand(){
try {
abort = true; calledHelp = true;
int start = time(NULL);
string nameFile = "";
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
- string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos";
- string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
+ string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
+ string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("alns");
string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
//you provided a groupfile
if (m->control_pressed) { return 0; }
//remove file made for uchime
- m->mothurRemove(filename);
+ if (!m->debug) { m->mothurRemove(filename); }
+ else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
//append files
m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
vector<string> setParameters();
string getCommandName() { return "chimera.uchime"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "uchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\nhttp://www.mothur.org/wiki/Chimera.uchime\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection, Bioinformatics, in press.\n"; }
string getDescription() { return "detect chimeric sequences"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string ChopSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "chop.fasta"; }
+ else if (type == "accnos") { outputFileName = "chop.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChopSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
ChopSeqsCommand::ChopSeqsCommand(){
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "chop.fasta";
- string outputFileNameAccnos = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "chop.accnos";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
+ string outputFileNameAccnos = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos");
ofstream out;
m->openOutputFile(outputFileName, out);
vector<string> setParameters();
string getCommandName() { return "chop.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Chops.seqs"; }
string getDescription() { return "trim sequence length"; }
MPI_File_close(&inMPI);
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
-#else
- ifstream inTax;
- m->openInputFile(file, inTax);
-
- //read template seqs and save
- while (!inTax.eof()) {
- inTax >> name; m->gobble(inTax);
- inTax >> taxInfo;
-
- if (m->debug) { m->mothurOut("[DEBUG]: name = '" + name + "' tax = '" + taxInfo + "'\n"); }
-
- taxonomy[name] = taxInfo;
-
- phyloTree->addSeqToTree(name, taxInfo);
-
- m->gobble(inTax);
- }
- inTax.close();
-#endif
-
+#else
-
+ taxonomy.clear();
+ m->readTax(file, taxonomy);
+ for (map<string, string>::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) { phyloTree->addSeqToTree(itTax->first, itTax->second); }
+#endif
phyloTree->assignHeirarchyIDs(0);
phyloTree->setUp(file);
}
}
//**********************************************************************************************************************
+string ClassifyOtuCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "constaxonomy") { outputFileName = "cons.taxonomy"; }
+ else if (type == "taxsummary") { outputFileName = "cons.tax.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClassifyOtuCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ClassifyOtuCommand::ClassifyOtuCommand(){
try {
abort = true; calledHelp = true;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//if user gave a namesfile then use it
- if (namefile != "") { readNamesFile(); }
+ if (namefile != "") { m->readNames(namefile, nameMap, true); }
//read taxonomy file and save in map for easy access in building bin trees
- readTaxonomyFile();
+ m->readTax(taxfile, taxMap);
if (m->control_pressed) { return 0; }
exit(1);
}
}
-
-//**********************************************************************************************************************
-int ClassifyOtuCommand::readNamesFile() {
- try {
-
- ifstream inNames;
- m->openInputFile(namefile, inNames);
-
- string name, names;
-
- while(!inNames.eof()){
- inNames >> name; //read from first column A
- inNames >> names; //read from second column A,B,C,D
- m->gobble(inNames);
-
- //parse names into vector
- vector<string> theseNames;
- m->splitAtComma(names, theseNames);
-
- for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = name; }
-
- if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
- }
- inNames.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ClassifyOtuCommand", "readNamesFile");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int ClassifyOtuCommand::readTaxonomyFile() {
- try {
-
- ifstream in;
- m->openInputFile(taxfile, in);
-
- string name, tax;
-
- while(!in.eof()){
- in >> name >> tax;
- m->gobble(in);
-
- //are there confidence scores, if so remove them
- if (tax.find_first_of('(') != -1) { m->removeConfidences(tax); }
-
- taxMap[name] = tax;
-
- if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ClassifyOtuCommand", "readTaxonomyFile");
- exit(1);
- }
-}
//**********************************************************************************************************************
vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size, string& conTax) {
try{
if (outputDir == "") { outputDir += m->hasPath(listfile); }
ofstream out;
- string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("constaxonomy");
m->openOutputFile(outputFile, out);
outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
ofstream outSum;
- string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.tax.summary";
+ string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("taxsummary");
m->openOutputFile(outputSumFile, outSum);
outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
vector<string> setParameters();
string getCommandName() { return "classify.otu"; }
string getCommandCategory() { return "Phylotype Analysis"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol. \nhttp://www.mothur.org/wiki/Classify.otu"; }
string getDescription() { return "find the concensus taxonomy for each OTU"; }
map<string, string> nameMap;
map<string, string> taxMap;
- int readNamesFile();
- int readTaxonomyFile();
int process(ListVector*);
string addUnclassifieds(string, int);
vector<string> findConsensusTaxonomy(int, ListVector*, int&, string&); // returns the name of the "representative" taxonomy of given bin
}
}
//**********************************************************************************************************************
+string ClassifySeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "taxonomy") { outputFileName = "taxonomy"; }
+ else if (type == "accnos") { outputFileName = "flip.accnos"; }
+ else if (type == "taxsummary") { outputFileName = "tax.summary"; }
+ else if (type == "matchdist") { outputFileName = "match.dist"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClassifySeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ClassifySeqsCommand::ClassifySeqsCommand(){
try {
abort = true; calledHelp = true;
string RippedTaxName = "";
bool foundDot = false;
for (int i = baseTName.length()-1; i >= 0; i--) {
- cout << baseTName[i] << endl;
if (foundDot && (baseTName[i] != '.')) { RippedTaxName = baseTName[i] + RippedTaxName; }
else if (foundDot && (baseTName[i] == '.')) { break; }
else if (!foundDot && (baseTName[i] == '.')) { foundDot = true; }
if (RippedTaxName != "") { RippedTaxName += "."; }
if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
- string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
- string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "flip.accnos";
+ string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("taxonomy");
+ string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("accnos");
string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
- string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary";
+ string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("taxsummary");
if ((method == "knn") && (search == "distance")) {
- string DistName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "match.dist";
+ string DistName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("matchdist");
classify->setDistName(DistName); outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
}
if(namefile != "") {
m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();
-
nameMap.clear(); //remove old names
-
- ifstream inNames;
- m->openInputFile(namefileNames[s], inNames);
-
- string firstCol, secondCol;
- while(!inNames.eof()) {
- inNames >> firstCol >> secondCol; m->gobble(inNames);
-
- vector<string> temp;
- m->splitAtComma(secondCol, temp);
-
- nameMap[firstCol] = temp;
- }
- inNames.close();
-
+ m->readNames(namefileNames[s], nameMap);
m->mothurOut(" Done."); m->mothurOutEndLine();
}
#endif
else { m->mothurRemove(accnos); } //remove so other files can be renamed to it
for(int i=0;i<processIDS.size();i++){
- appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
- appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
+ m->appendFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
+ m->appendFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
if (!(m->isBlank(accnos + toString(processIDS[i]) + ".temp"))) {
nonBlankAccnosFiles.push_back(accnos + toString(processIDS[i]) + ".temp");
}else { m->mothurRemove((accnos + toString(processIDS[i]) + ".temp")); }
rename(nonBlankAccnosFiles[0].c_str(), accnos.c_str());
for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
- appendTaxFiles(nonBlankAccnosFiles[h], accnos);
+ m->appendFiles(nonBlankAccnosFiles[h], accnos);
m->mothurRemove(nonBlankAccnosFiles[h]);
}
}else { //recreate the accnosfile if needed
exit(1);
}
}
-/**************************************************************************************************/
-
-void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
- try{
-
- ofstream output;
- ifstream input;
- m->openOutputFileAppend(filename, output);
- m->openInputFile(temp, input);
-
- while(char c = input.get()){
- if(input.eof()) { break; }
- else { output << c; }
- }
-
- input.close();
- output.close();
- }
- catch(exception& e) {
- m->errorOut(e, "ClassifySeqsCommand", "appendTaxFiles");
- exit(1);
- }
-}
-
//**********************************************************************************************************************
int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string accnos, string filename){
vector<string> setParameters();
string getCommandName() { return "classify.seqs"; }
string getCommandCategory() { return "Phylotype Analysis"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Wang Q, Garrity GM, Tiedje JM, Cole JR (2007). Naive Bayesian classifier for rapid assignment of rRNA sequences into the new bacterial taxonomy. Appl Environ Microbiol 73: 5261-7. [ for Bayesian classifier ] \nAltschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ (1997). Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res 25: 3389-402. [ for BLAST ] \nDeSantis TZ, Hugenholtz P, Larsen N, Rojas M, Brodie EL, Keller K, Huber T, Dalevi D, Hu P, Andersen GL (2006). Greengenes, a chimera-checked 16S rRNA gene database and workbench compatible with ARB. Appl Environ Microbiol 72: 5069-72. [ for kmer ] \nhttp://www.mothur.org/wiki/Classify.seqs"; }
string getDescription() { return "classify sequences"; }
bool abort, probs, save, flip;
int driver(linePair*, string, string, string, string);
- void appendTaxFiles(string, string);
int createProcesses(string, string, string, string);
string addUnclassifieds(string, int);
exit(1);
}
}
-
+//**********************************************************************************************************************
+string ClassifyTreeCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "tree") { outputFileName = "taxonomy.tre"; }
+ else if (type == "summary") { outputFileName = "taxonomy.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClassifyTreeCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
ClassifyTreeCommand::ClassifyTreeCommand(){
try {
Tree* outputTree = T[0];
delete reader;
- if (namefile != "") { readNamesFile(); }
+ if (namefile != "") { m->readNames(namefile, nameMap, nameCount); }
if (m->control_pressed) { delete tmap; delete outputTree; return 0; }
- readTaxonomyFile();
+ m->readTax(taxonomyfile, taxMap);
/***************************************************/
// get concensus taxonomies //
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(treefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.summary";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("summary");
outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
ofstream out;
string treeOutputDir = outputDir;
if (outputDir == "") { treeOutputDir += m->hasPath(treefile); }
- string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.tre";
+ string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("tree");
//create a map from tree node index to names of descendants, save time later
map<int, map<string, set<string> > > nodeToDescendants; //node# -> (groupName -> groupMembers)
exit(1);
}
}
-//**********************************************************************************************************************
-int ClassifyTreeCommand::readTaxonomyFile() {
- try {
-
- ifstream in;
- m->openInputFile(taxonomyfile, in);
-
- string name, tax;
-
- while(!in.eof()){
- in >> name >> tax;
- m->gobble(in);
-
- //are there confidence scores, if so remove them
- if (tax.find_first_of('(') != -1) { m->removeConfidences(tax); }
-
- taxMap[name] = tax;
-
- if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ClassifyTreeCommand", "readTaxonomyFile");
- exit(1);
- }
-}
-
-/*****************************************************************/
-int ClassifyTreeCommand::readNamesFile() {
- try {
- ifstream inNames;
- m->openInputFile(namefile, inNames);
-
- string name, names;
-
- while(!inNames.eof()){
- inNames >> name; //read from first column A
- inNames >> names; //read from second column A,B,C,D
- m->gobble(inNames);
-
- //parse names into vector
- vector<string> theseNames;
- m->splitAtComma(names, theseNames);
-
- for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = name; }
- nameCount[name] = theseNames.size();
-
- if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
- }
- inNames.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ClassifyTreeCommand", "readNamesFile");
- exit(1);
- }
-}
-
/*****************************************************************/
vector<string> setParameters();
string getCommandName() { return "classify.tree"; }
string getCommandCategory() { return "Phylotype Analysis"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Classify.tree"; }
string getDescription() { return "Find the consensus taxonomy for the descendant of each tree node"; }
int getClassifications(Tree*&);
map<string, set<string> > getDescendantList(Tree*&, int, map<int, map<string, set<string> > >);
string getTaxonomy(set<string>, int&);
- int readNamesFile();
- int readTaxonomyFile();
};
exit(1);
}
}
-
+//**********************************************************************************************************************
+string ClearcutCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "tree") { outputFileName = "tre"; }
+ else if (type == "matrixout") { outputFileName = ""; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClearcutCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
ClearcutCommand::ClearcutCommand(){
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//prepare filename
- string outputName = outputDir + m->getRootName(m->getSimpleName(inputFile)) + "tre";
+ string outputName = outputDir + m->getRootName(m->getSimpleName(inputFile)) + getOutputFileNameTag("tree");
outputNames.push_back(outputName); outputTypes["tree"].push_back(outputName);
vector<char*> cPara;
vector<string> setParameters();
string getCommandName() { return "clearcut"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Sheneman L, Evans J, Foster JA (2006). Clearcut: a fast implementation of relaxed neighbor joining. Bioinformatics 22: 2823-4. \nhttp://www.mothur.org/wiki/Clearcut"; }
string getDescription() { return "create a tree from a fasta or phylip file"; }
}
}
//**********************************************************************************************************************
+string ClearMemoryCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClearMemoryCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
string ClearMemoryCommand::getHelpString(){
try {
string helpString = "";
vector<string> setParameters();
string getCommandName() { return "clear.memory"; }
string getCommandCategory() { return "General"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Clear.memory"; }
string getDescription() { return "remove saved references from memory"; }
#include "cluster.hpp"
#include "rabundvector.hpp"
#include "listvector.hpp"
-#include "sparsematrix.hpp"
/***********************************************************************/
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string f) :
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f) :
rabund(rav), list(lv), dMatrix(dm), method(f)
{
try {
-/*
- cout << "sizeof(MatData): " << sizeof(MatData) << endl;
- cout << "sizeof(PCell*): " << sizeof(PCell*) << endl;
-
- int nCells = dMatrix->getNNodes();
- time_t start = time(NULL);
-
- MatVec matvec = MatVec(nCells);
- int i = 0;
- for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
- matvec[i++] = currentCell;
- }
- for (i= matvec.size();i>0;i--) {
- dMatrix->rmCell(matvec[i-1]);
- }
- MatData it = dMatrix->begin();
- while (it != dMatrix->end()) {
- it = dMatrix->rmCell(it);
- }
- cout << "Time to remove " << nCells << " cells: " << time(NULL) - start << " seconds" << endl;
- exit(0);
- MatData it = dMatrix->begin();
- cout << it->row << "/" << it->column << "/" << it->dist << endl;
- dMatrix->rmCell(dMatrix->begin());
- cout << it->row << "/" << it->column << "/" << it->dist << endl;
- exit(0);
-*/
-
- // Create a data structure to quickly access the PCell information
- // for a certain sequence. It consists of a vector of lists, where
- // a list contains pointers (iterators) to the all distances related
- // to a certain sequence. The Vector is accessed via the index of a
- // sequence in the distance matrix.
-//ofstream outtemp;
-//string temp = "temp";
-//m->openOutputFile(temp, outtemp);
-//cout << lv->size() << endl;
- seqVec = vector<MatVec>(lv->size());
- for (MatData currentCell = dMatrix->begin(); currentCell != dMatrix->end(); currentCell++) {
-//outtemp << currentCell->row << '\t' << currentCell->column << '\t' << currentCell->dist << endl;
- seqVec[currentCell->row].push_back(currentCell);
- seqVec[currentCell->column].push_back(currentCell);
- }
-//outtemp.close();
- mapWanted = false; //set to true by mgcluster to speed up overlap merge
-
- //save so you can modify as it changes in average neighbor
- cutoff = c;
- m = MothurOut::getInstance();
-
+
+ mapWanted = false; //set to true by mgcluster to speed up overlap merge
+
+ //save so you can modify as it changes in average neighbor
+ cutoff = c;
+ m = MothurOut::getInstance();
}
catch(exception& e) {
m->errorOut(e, "Cluster", "Cluster");
exit(1);
}
}
-
-/***********************************************************************/
-
-void Cluster::getRowColCells() {
- try {
- PCell* smallCell = dMatrix->getSmallestCell(); //find the smallest cell - this routine should probably not be in the SpMat class
-
- smallRow = smallCell->row; // get its row
- smallCol = smallCell->column; // get its column
- smallDist = smallCell->dist; // get the smallest distance
- //cout << "small row = " << smallRow << "small col = " << smallCol << "small dist = " << smallDist << endl;
-
- rowCells = seqVec[smallRow]; // all distances related to the row index
- colCells = seqVec[smallCol]; // all distances related to the column index
- nRowCells = rowCells.size();
- nColCells = colCells.size();
-//cout << "num rows = " << nRowCells << "num col = " << nColCells << endl;
-
- //for (int i = 0; i < nColCells; i++) { cout << colCells[i]->row << '\t' << colCells[i]->column << endl; }
- //for (int i = 0; i < nRowCells; i++) { cout << rowCells[i]->row << '\t' << rowCells[i]->column << endl; }
- }
- catch(exception& e) {
- m->errorOut(e, "Cluster", "getRowColCells");
- exit(1);
- }
-
-}
-/***********************************************************************/
-// Remove the specified cell from the seqVec and from the sparse
-// matrix
-void Cluster::removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix){
- try {
-
- ull drow = cell->row;
- ull dcol = cell->column;
- if (((vrow >=0) && (drow != smallRow)) ||
- ((vcol >=0) && (dcol != smallCol))) {
- ull dtemp = drow;
- drow = dcol;
- dcol = dtemp;
- }
-
- ull crow;
- ull ccol;
- int nCells;
- if (vrow < 0) {
- nCells = seqVec[drow].size();
- for (vrow=0; vrow<nCells;vrow++) {
- crow = seqVec[drow][vrow]->row;
- ccol = seqVec[drow][vrow]->column;
- if (((crow == drow) && (ccol == dcol)) ||
- ((ccol == drow) && (crow == dcol))) {
- break;
- }
- }
- }
-
- seqVec[drow].erase(seqVec[drow].begin()+vrow);
- if (vcol < 0) {
- nCells = seqVec[dcol].size();
- for (vcol=0; vcol<nCells;vcol++) {
- crow = seqVec[dcol][vcol]->row;
- ccol = seqVec[dcol][vcol]->column;
- if (((crow == drow) && (ccol == dcol)) ||
- ((ccol == drow) && (crow == dcol))) {
- break;
- }
- }
- }
-
- seqVec[dcol].erase(seqVec[dcol].begin()+vcol);
-
- if (rmMatrix) {
- //cout << " removing = " << cell->row << '\t' << cell->column << '\t' << cell->dist << endl;
- dMatrix->rmCell(cell);
- // cout << "done" << endl;
- }
-
- }
- catch(exception& e) {
- m->errorOut(e, "Cluster", "removeCell");
- exit(1);
- }
-}
/***********************************************************************/
-
void Cluster::clusterBins(){
try {
- // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
-
- rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
+ rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
rabund->set(smallRow, 0);
rabund->setLabel(toString(smallDist));
-
- // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
}
catch(exception& e) {
m->errorOut(e, "Cluster", "clusterBins");
exit(1);
}
-
-
}
-
/***********************************************************************/
void Cluster::clusterNames(){
try {
- // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
if (mapWanted) { updateMap(); }
list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
list->set(smallRow, "");
list->setLabel(toString(smallDist));
-
- // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
}
catch(exception& e) {
m->errorOut(e, "Cluster", "clusterNames");
exit(1);
}
-
}
-
/***********************************************************************/
-//This function clusters based on the method of the derived class
-//At the moment only average and complete linkage are covered, because
-//single linkage uses a different approach.
void Cluster::update(double& cutOFF){
try {
- getRowColCells();
-
+ smallCol = dMatrix->getSmallestCell(smallRow);
+ nColCells = dMatrix->seqVec[smallCol].size();
+ nRowCells = dMatrix->seqVec[smallRow].size();
+
vector<int> foundCol(nColCells, 0);
-
+ //cout << dMatrix->getNNodes() << " small cell: " << smallRow << '\t' << smallCol << endl;
int search;
bool changed;
-
- // The vector has to be traversed in reverse order to preserve the index
- // for faster removal in removeCell()
+
for (int i=nRowCells-1;i>=0;i--) {
+ if (m->control_pressed) { break; }
+
//if you are not the smallCell
- if (!((rowCells[i]->row == smallRow) && (rowCells[i]->column == smallCol))) {
- if (rowCells[i]->row == smallRow) {
- search = rowCells[i]->column;
- } else {
- search = rowCells[i]->row;
- }
-
+ if (dMatrix->seqVec[smallRow][i].index != smallCol) {
+ search = dMatrix->seqVec[smallRow][i].index;
+
bool merged = false;
for (int j=0;j<nColCells;j++) {
- if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) { //if you are not hte smallest distance
- if (colCells[j]->row == search || colCells[j]->column == search) {
+
+ if (dMatrix->seqVec[smallCol][j].index != smallRow) { //if you are not the smallest distance
+ if (dMatrix->seqVec[smallCol][j].index == search) {
foundCol[j] = 1;
merged = true;
- changed = updateDistance(colCells[j], rowCells[i]);
- // If the cell's distance changed and it had the same distance as
- // the smallest distance, invalidate the mins vector in SparseMatrix
- if (changed) {
- if (colCells[j]->vectorMap != NULL) {
- *(colCells[j]->vectorMap) = NULL;
- colCells[j]->vectorMap = NULL;
- }
- }
+ changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
+ dMatrix->updateCellCompliment(smallCol, j);
break;
- }
- }
+ }else if (dMatrix->seqVec[smallCol][j].index < search) { j+=nColCells; } //we don't have a distance for this cell
+ }
}
//if not merged it you need it for warning
if ((!merged) && (method == "average" || method == "weighted")) {
- //m->mothurOut("Warning: trying to merge cell " + toString(rowCells[i]->row+1) + " " + toString(rowCells[i]->column+1) + " distance " + toString(rowCells[i]->dist) + " with value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); m->mothurOutEndLine();
- if (cutOFF > rowCells[i]->dist) {
- cutOFF = rowCells[i]->dist;
- //m->mothurOut("changing cutoff to " + toString(cutOFF)); m->mothurOutEndLine();
+ if (cutOFF > dMatrix->seqVec[smallRow][i].dist) {
+ cutOFF = dMatrix->seqVec[smallRow][i].dist;
}
-
+
}
- removeCell(rowCells[i], i , -1);
-
+ dMatrix->rmCell(smallRow, i);
}
}
clusterBins();
clusterNames();
-
+
// Special handling for singlelinkage case, not sure whether this
// could be avoided
for (int i=nColCells-1;i>=0;i--) {
- if (foundCol[i] == 0) {
+ if (foundCol[i] == 0) {
if (method == "average" || method == "weighted") {
- if (!((colCells[i]->row == smallRow) && (colCells[i]->column == smallCol))) {
- //m->mothurOut("Warning: merging cell " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); m->mothurOutEndLine();
- if (cutOFF > colCells[i]->dist) {
- cutOFF = colCells[i]->dist;
- //m->mothurOut("changing cutoff to " + toString(cutOFF)); m->mothurOutEndLine();
+ if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance
+ if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {
+ cutOFF = dMatrix->seqVec[smallCol][i].dist;
}
}
}
- removeCell(colCells[i], -1, i);
+ dMatrix->rmCell(smallCol, i);
}
}
+
}
catch(exception& e) {
m->errorOut(e, "Cluster", "update");
try {
mapWanted = f;
- //initialize map
- for (int i = 0; i < list->getNumBins(); i++) {
-
- //parse bin
- string names = list->get(i);
- while (names.find_first_of(',') != -1) {
- //get name from bin
- string name = names.substr(0,names.find_first_of(','));
- //save name and bin number
- seq2Bin[name] = i;
- names = names.substr(names.find_first_of(',')+1, names.length());
- }
-
- //get last name
- seq2Bin[names] = i;
+ //initialize map
+ for (int k = 0; k < list->getNumBins(); k++) {
+
+ string names = list->get(k);
+
+ //parse bin
+ string individual = "";
+ int binNameslength = names.size();
+ for(int j=0;j<binNameslength;j++){
+ if(names[j] == ','){
+ seq2Bin[individual] = k;
+ individual = "";
+ }
+ else{ individual += names[j]; }
+ }
+ //get last name
+ seq2Bin[individual] = k;
}
}
}
/***********************************************************************/
void Cluster::updateMap() {
-try {
+ try {
//update location of seqs in smallRow since they move to smallCol now
string names = list->get(smallRow);
- while (names.find_first_of(',') != -1) {
- //get name from bin
- string name = names.substr(0,names.find_first_of(','));
- //save name and bin number
- seq2Bin[name] = smallCol;
- names = names.substr(names.find_first_of(',')+1, names.length());
- }
-
- //get last name
- seq2Bin[names] = smallCol;
+ string individual = "";
+ int binNameslength = names.size();
+ for(int j=0;j<binNameslength;j++){
+ if(names[j] == ','){
+ seq2Bin[individual] = smallCol;
+ individual = "";
+ }
+ else{ individual += names[j]; }
+ }
+ //get last name
+ seq2Bin[individual] = smallCol;
+
}
catch(exception& e) {
m->errorOut(e, "Cluster", "updateMap");
#define CLUSTER_H
+
#include "mothur.h"
-#include "sparsematrix.hpp"
+#include "sparsedistancematrix.h"
#include "mothurout.h"
class RAbundVector;
class ListVector;
-typedef vector<MatData> MatVec;
-
class Cluster {
public:
- Cluster(RAbundVector*, ListVector*, SparseMatrix*, float, string);
+ Cluster(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
virtual void update(double&);
virtual string getTag() = 0;
virtual void setMapWanted(bool m);
virtual map<string, int> getSeqtoBin() { return seq2Bin; }
-
-protected:
- void getRowColCells();
- void removeCell(const MatData& cell, int vrow, int vcol, bool rmMatrix=true);
-
- virtual bool updateDistance(MatData& colCell, MatData& rowCell) = 0;
-
+
+protected:
+ virtual bool updateDistance(PDistCell& colCell, PDistCell& rowCell) = 0;
+
virtual void clusterBins();
virtual void clusterNames();
virtual void updateMap();
RAbundVector* rabund;
ListVector* list;
- SparseMatrix* dMatrix;
+ SparseDistanceMatrix* dMatrix;
- int smallRow;
- int smallCol;
+ ull smallRow;
+ ull smallCol;
float smallDist;
bool mapWanted;
float cutoff;
map<string, int> seq2Bin;
string method;
- vector<MatVec> seqVec; // contains vectors of cells related to a certain sequence
- MatVec rowCells;
- MatVec colCells;
ull nRowCells;
ull nColCells;
MothurOut* m;
class CompleteLinkage : public Cluster {
public:
- CompleteLinkage(RAbundVector*, ListVector*, SparseMatrix*, float, string);
- bool updateDistance(MatData& colCell, MatData& rowCell);
+ CompleteLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+ bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
string getTag();
private:
-
+
};
/***********************************************************************/
class SingleLinkage : public Cluster {
public:
- SingleLinkage(RAbundVector*, ListVector*, SparseMatrix*, float, string);
+ SingleLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
void update(double&);
- bool updateDistance(MatData& colCell, MatData& rowCell);
+ bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
string getTag();
private:
-
+
};
/***********************************************************************/
class AverageLinkage : public Cluster {
public:
- AverageLinkage(RAbundVector*, ListVector*, SparseMatrix*, float, string);
- bool updateDistance(MatData& colCell, MatData& rowCell);
+ AverageLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+ bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
string getTag();
private:
int rowBin;
int colBin;
int totalBin;
-
+
};
/***********************************************************************/
class WeightedLinkage : public Cluster {
public:
- WeightedLinkage(RAbundVector*, ListVector*, SparseMatrix*, float, string);
- bool updateDistance(MatData& colCell, MatData& rowCell);
+ WeightedLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+ bool updateDistance(PDistCell& colCell, PDistCell& rowCell);
string getTag();
private:
/***********************************************************************/
-#endif
+
+
+#endif
\ No newline at end of file
struct colDist {
int col;
int row;
- double dist;
+ float dist;
colDist(int r, int c, double d) : row(r), col(c), dist(d) {}
};
RAbundVector* rabund;
ListVector* list;
- vector< vector<double> > dMatrix;
+ vector< vector<float> > dMatrix;
//vector<colDist> rowSmallDists;
int smallRow;
#include "readmatrix.hpp"
#include "clusterdoturcommand.h"
+
//**********************************************************************************************************************
vector<string> ClusterCommand::setParameters(){
try {
CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
- CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn);
CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
string ClusterCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The cluster command parameter options are phylip, column, name, method, cuttoff, hard, precision, sim, showabund and timing. Phylip or column and name are required, unless you have a valid current file.\n";
+ helpString += "The cluster command parameter options are phylip, column, name, count, method, cuttoff, hard, precision, sim, showabund and timing. Phylip or column and name are required, unless you have a valid current file.\n";
helpString += "The cluster command should be in the following format: \n";
helpString += "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
helpString += "The acceptable cluster methods are furthest, nearest, average and weighted. If no method is provided then average is assumed.\n";
}
}
//**********************************************************************************************************************
+string ClusterCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "list") { outputFileName = "list"; }
+ else if (type == "rabund") { outputFileName = "rabund"; }
+ else if (type == "sabund") { outputFileName = "sabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ClusterCommand::ClusterCommand(){
try {
abort = true; calledHelp = true;
if (namefile == "not open") { abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
if ((phylipfile == "") && (columnfile == "")) {
//is there are current file available for either of these?
else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a cluster command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
if (columnfile != "") {
- if (namefile == "") {
+ if ((namefile == "") && (countfile == "")){
namefile = m->getNameFile();
if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine();
- abort = true;
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine();
+ abort = true;
+ }
}
}
}
+ if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
//get user cutoff and precision or use defaults
//run unique.seqs for deconvolute results
string inputString = "phylip=" + distfile;
if (namefile != "") { inputString += ", name=" + namefile; }
+ else if (countfile != "") { inputString += ", count=" + countfile; }
inputString += ", precision=" + toString(precision);
inputString += ", method=" + method;
if (hard) { inputString += ", hard=T"; }
read->setCutoff(cutoff);
NameAssignment* nameMap = NULL;
+ CountTable* ct = NULL;
if(namefile != ""){
nameMap = new NameAssignment(namefile);
nameMap->readMap();
- }
+ read->read(nameMap);
+ }else if (countfile != "") {
+ ct = new CountTable();
+ ct->readTable(countfile);
+ read->read(ct);
+ }
- read->read(nameMap);
list = read->getListVector();
- matrix = read->getMatrix();
- rabund = new RAbundVector(list->getRAbundVector());
+ matrix = read->getDMatrix();
+
+ if(countfile != "") {
+ rabund = new RAbundVector();
+ createRabund(ct, list, rabund); //creates an rabund that includes the counts for the unique list
+ delete ct;
+ }else { rabund = new RAbundVector(list->getRAbundVector()); }
delete read;
if (m->control_pressed) { //clean up
- delete list; delete matrix; delete rabund;
- sabundFile.close();rabundFile.close();listFile.close();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- return 0;
+ delete list; delete matrix; delete rabund; if(countfile == ""){rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); }
+ listFile.close(); m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0;
}
//create cluster
if (outputDir == "") { outputDir += m->hasPath(distfile); }
fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
- m->openOutputFile(fileroot+ tag + ".sabund", sabundFile);
- m->openOutputFile(fileroot+ tag + ".rabund", rabundFile);
- m->openOutputFile(fileroot+ tag + ".list", listFile);
-
- outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
- outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
- outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
+ string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
+ string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
+ string listFileName = fileroot+ tag + ".";
+ if (countfile != "") { listFileName += "unique_"; }
+ listFileName += getOutputFileNameTag("list");
+
+ if (countfile == "") {
+ m->openOutputFile(sabundFileName, sabundFile);
+ m->openOutputFile(rabundFileName, rabundFile);
+ outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+ outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+
+ }
+ m->openOutputFile(listFileName, listFile);
+ outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
time_t estart = time(NULL);
loops = 0;
double saveCutoff = cutoff;
- while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
+ while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
if (m->control_pressed) { //clean up
delete list; delete matrix; delete rabund; delete cluster;
- sabundFile.close();rabundFile.close();listFile.close();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- return 0;
+ if(countfile == "") {rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); }
+ listFile.close(); m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0;
}
if (print_start && m->isTrue(timing)) {
loops++;
cluster->update(cutoff);
-
- float dist = matrix->getSmallDist();
+
+ float dist = matrix->getSmallDist();
float rndDist;
if (hard) {
rndDist = m->ceilDist(dist, precision);
delete list;
delete rabund;
delete cluster;
-
- sabundFile.close();
- rabundFile.close();
+ if (countfile == "") {
+ sabundFile.close();
+ rabundFile.close();
+ }
listFile.close();
if (saveCutoff != cutoff) {
print_start = true;
loops = 0;
start = time(NULL);
-
- oldRAbund.setLabel(label);
- if (m->isTrue(showabund)) {
- oldRAbund.getSAbundVector().print(cout);
- }
- oldRAbund.print(rabundFile);
- oldRAbund.getSAbundVector().print(sabundFile);
-
+
+ if (countfile == "") {
+ oldRAbund.print(rabundFile);
+ oldRAbund.getSAbundVector().print(sabundFile);
+ }
+
+ oldRAbund.setLabel(label);
+ if (m->isTrue(showabund)) {
+ oldRAbund.getSAbundVector().print(cout);
+ }
+
oldList.setLabel(label);
oldList.print(listFile);
}
}
//**********************************************************************************************************************
+
+int ClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
+ try {
+ rabund->setLabel(list->getLabel());
+ for(int i = 0; i < list->getNumBins(); i++) {
+ if (m->control_pressed) { break; }
+ vector<string> binNames;
+ string bin = list->get(i);
+ m->splitAtComma(bin, binNames);
+ int total = 0;
+ for (int j = 0; j < binNames.size(); j++) { total += ct->getNumSeqs(binNames[j]); }
+ rabund->push_back(total);
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterCommand", "createRabund");
+ exit(1);
+ }
+
+}
+//**********************************************************************************************************************
#include "sabundvector.hpp"
#include "listvector.hpp"
#include "cluster.hpp"
-#include "sparsematrix.hpp"
+#include "sparsedistancematrix.h"
+#include "counttable.h"
/* The cluster() command:
The cluster command outputs a .list , .rabund and .sabund files.
vector<string> setParameters();
string getCommandName() { return "cluster"; }
string getCommandCategory() { return "Clustering"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol. \nhttp://www.mothur.org/wiki/Cluster"; }
string getDescription() { return "cluster your sequences into OTUs using a distance matrix"; }
private:
Cluster* cluster;
- SparseMatrix* matrix;
+ SparseDistanceMatrix* matrix;
ListVector* list;
RAbundVector* rabund;
RAbundVector oldRAbund;
bool abort, hard, sim;
- string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, format, distfile;
+ string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, format, distfile, countfile;
double cutoff;
string showabund, timing;
int precision, length;
void printData(string label);
vector<string> outputNames;
+
+ int createRabund(CountTable*&, ListVector*&, RAbundVector*&);
};
#endif
}
}
//**********************************************************************************************************************
+string ClusterDoturCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "list") { outputFileName = "list"; }
+ else if (type == "rabund") { outputFileName = "rabund"; }
+ else if (type == "sabund") { outputFileName = "sabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterDoturCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ClusterDoturCommand::ClusterDoturCommand(){
try {
abort = true; calledHelp = true;
if (outputDir == "") { outputDir += m->hasPath(phylipfile); }
fileroot = outputDir + m->getRootName(m->getSimpleName(phylipfile));
- m->openOutputFile(fileroot+ tag + ".sabund", sabundFile);
- m->openOutputFile(fileroot+ tag + ".rabund", rabundFile);
- m->openOutputFile(fileroot+ tag + ".list", listFile);
-
- outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
- outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
- outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
+ string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
+ string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
+ string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
+
+ m->openOutputFile(sabundFileName, sabundFile);
+ m->openOutputFile(rabundFileName, rabundFile);
+ m->openOutputFile(listFileName, listFile);
+
+ outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+ outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+ outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
float previousDist = 0.00000;
float rndPreviousDist = 0.00000;
vector<string> setParameters();
string getCommandName() { return "cluster.classic"; }
string getCommandCategory() { return "Clustering"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "Schloss PD, Handelsman J (2005). Introducing DOTUR, a computer program for defining operational taxonomic units and estimating species richness. Appl Environ Microbiol 71: 1501-6. \nhttp://www.mothur.org/wiki/Cluster.classic"; }
string getDescription() { return "cluster your sequences into OTUs using DOTUR’s method"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string ClusterFragmentsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "fragclust.fasta"; }
+ else if (type == "name") { outputFileName = "fragclust.names"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterFragmentsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
ClusterFragmentsCommand::ClusterFragmentsCommand(){
try {
string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
- string newFastaFile = fileroot + "fragclust.fasta";
- string newNamesFile = fileroot + "fragclust.names";
+ string newFastaFile = fileroot + getOutputFileNameTag("fasta");
+ string newNamesFile = fileroot + getOutputFileNameTag("name");
if (m->control_pressed) { return 0; }
vector<string> setParameters();
string getCommandName() { return "cluster.fragments"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Cluster.fragments"; }
string getDescription() { return "creates a namesfile with sequences that are a fragment of a larger sequence"; }
}
}
//**********************************************************************************************************************
+string ClusterSplitCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "list") { outputFileName = "list"; }
+ else if (type == "rabund") { outputFileName = "rabund"; }
+ else if (type == "sabund") { outputFileName = "sabund"; }
+ else if (type == "column") { outputFileName = "dist"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ClusterSplitCommand::ClusterSplitCommand(){
try {
abort = true; calledHelp = true;
if (outputDir == "") { outputDir += m->hasPath(distfile); }
fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
- m->openOutputFile(fileroot+ tag + ".sabund", outSabund);
- m->openOutputFile(fileroot+ tag + ".rabund", outRabund);
- m->openOutputFile(fileroot+ tag + ".list", outList);
-
- outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["list"].push_back(fileroot+ tag + ".list");
- outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
- outputNames.push_back(fileroot+ tag + ".list"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
+ string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
+ string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
+ string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
+
+ m->openOutputFile(sabundFileName, outSabund);
+ m->openOutputFile(rabundFileName, outRabund);
+ m->openOutputFile(listFileName, outList);
+ outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+ outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+ outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
map<float, int>::iterator itLabel;
//for each label needed
string listFileName = "";
Cluster* cluster = NULL;
- SparseMatrix* matrix = NULL;
+ SparseDistanceMatrix* matrix = NULL;
ListVector* list = NULL;
ListVector oldList;
RAbundVector* rabund = NULL;
list = read->getListVector();
oldList = *list;
- matrix = read->getMatrix();
+ matrix = read->getDMatrix();
delete read; read = NULL;
delete nameMap; nameMap = NULL;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("column");
m->mothurRemove(outputFileName);
#include "sabundvector.hpp"
#include "listvector.hpp"
#include "cluster.hpp"
-#include "sparsematrix.hpp"
+#include "sparsedistancematrix.h"
#include "readcluster.h"
#include "splitmatrix.h"
#include "readphylip.h"
vector<string> setParameters();
string getCommandName() { return "cluster.split"; }
string getCommandCategory() { return "Clustering"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol. \nhttp://www.mothur.org/wiki/Cluster.split"; }
string getDescription() { return "splits your sequences by distance or taxonomy then clusters into OTUs"; }
}
}
//**********************************************************************************************************************
+string CollectCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "sobs") { outputFileName = "sobs"; }
+ else if (type == "chao") { outputFileName = "chao"; }
+ else if (type == "nseqs") { outputFileName = "nseqs"; }
+ else if (type == "coverage") { outputFileName = "coverage"; }
+ else if (type == "ace") { outputFileName = "ace"; }
+ else if (type == "jack") { outputFileName = "jack"; }
+ else if (type == "shannon") { outputFileName = "shannon"; }
+ else if (type == "shannoneven") { outputFileName = "shannoneven"; }
+ else if (type == "npshannon") { outputFileName = "npshannon"; }
+ else if (type == "heip") { outputFileName = "heip"; }
+ else if (type == "smithwilson") { outputFileName = "smithwilson"; }
+ else if (type == "simpson") { outputFileName = "simpson"; }
+ else if (type == "simpsoneven") { outputFileName = "simpsoneven"; }
+ else if (type == "invsimpson") { outputFileName = "invsimpson"; }
+ else if (type == "bootstrap") { outputFileName = "bootstrap"; }
+ else if (type == "geometric") { outputFileName = "geometric"; }
+ else if (type == "qstat") { outputFileName = "qstat"; }
+ else if (type == "logseries") { outputFileName = "logseries"; }
+ else if (type == "bergerparker") { outputFileName = "bergerparker"; }
+ else if (type == "bstick") { outputFileName = "bstick"; }
+ else if (type == "goodscoverage") { outputFileName = "goodscoverage"; }
+ else if (type == "efron") { outputFileName = "efron"; }
+ else if (type == "boneh") { outputFileName = "boneh"; }
+ else if (type == "solow") { outputFileName = "solow"; }
+ else if (type == "shen") { outputFileName = "shen"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CollectCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
CollectCommand::CollectCommand(){
try {
abort = true; calledHelp = true;
for (int i=0; i<Estimators.size(); i++) {
if (validCalculator.isValidCalculator("single", Estimators[i]) == true) {
if (Estimators[i] == "sobs") {
- cDisplays.push_back(new CollectDisplay(new Sobs(), new OneColumnFile(fileNameRoot+"sobs")));
- outputNames.push_back(fileNameRoot+"sobs"); outputTypes["sobs"].push_back(fileNameRoot+"sobs");
+ cDisplays.push_back(new CollectDisplay(new Sobs(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("sobs"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("sobs")); outputTypes["sobs"].push_back(fileNameRoot+getOutputFileNameTag("sobs"));
}else if (Estimators[i] == "chao") {
- cDisplays.push_back(new CollectDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"chao")));
- outputNames.push_back(fileNameRoot+"chao"); outputTypes["chao"].push_back(fileNameRoot+"chao");
+ cDisplays.push_back(new CollectDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("chao"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("chao")); outputTypes["chao"].push_back(fileNameRoot+getOutputFileNameTag("chao"));
}else if (Estimators[i] == "nseqs") {
- cDisplays.push_back(new CollectDisplay(new NSeqs(), new OneColumnFile(fileNameRoot+"nseqs")));
- outputNames.push_back(fileNameRoot+"nseqs"); outputTypes["nseqs"].push_back(fileNameRoot+"nseqs");
+ cDisplays.push_back(new CollectDisplay(new NSeqs(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("nseqs"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("nseqs")); outputTypes["nseqs"].push_back(fileNameRoot+getOutputFileNameTag("nseqs"));
}else if (Estimators[i] == "coverage") {
- cDisplays.push_back(new CollectDisplay(new Coverage(), new OneColumnFile(fileNameRoot+"coverage")));
- outputNames.push_back(fileNameRoot+"coverage"); outputTypes["coverage"].push_back(fileNameRoot+"coverage");
+ cDisplays.push_back(new CollectDisplay(new Coverage(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("coverage"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("coverage")); outputTypes["coverage"].push_back(fileNameRoot+getOutputFileNameTag("coverage"));
}else if (Estimators[i] == "ace") {
- cDisplays.push_back(new CollectDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"ace")));
- outputNames.push_back(fileNameRoot+"ace"); outputTypes["ace"].push_back(fileNameRoot+"ace");
+ cDisplays.push_back(new CollectDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("ace"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("ace")); outputTypes["ace"].push_back(fileNameRoot+getOutputFileNameTag("ace"));
}else if (Estimators[i] == "jack") {
- cDisplays.push_back(new CollectDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"jack")));
- outputNames.push_back(fileNameRoot+"jack"); outputTypes["jack"].push_back(fileNameRoot+"jack");
+ cDisplays.push_back(new CollectDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("jack"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("jack")); outputTypes["jack"].push_back(fileNameRoot+getOutputFileNameTag("jack"));
}else if (Estimators[i] == "shannon") {
- cDisplays.push_back(new CollectDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"shannon")));
- outputNames.push_back(fileNameRoot+"shannon"); outputTypes["shannon"].push_back(fileNameRoot+"shannon");
+ cDisplays.push_back(new CollectDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("shannon"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("shannon")); outputTypes["shannon"].push_back(fileNameRoot+getOutputFileNameTag("shannon"));
}else if (Estimators[i] == "shannoneven") {
- cDisplays.push_back(new CollectDisplay(new ShannonEven(), new OneColumnFile(fileNameRoot+"shannoneven")));
- outputNames.push_back(fileNameRoot+"shannoneven"); outputTypes["shannoneven"].push_back(fileNameRoot+"shannoneven");
+ cDisplays.push_back(new CollectDisplay(new ShannonEven(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("shannoneven"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("shannoneven")); outputTypes["shannoneven"].push_back(fileNameRoot+getOutputFileNameTag("shannoneven"));
}else if (Estimators[i] == "npshannon") {
- cDisplays.push_back(new CollectDisplay(new NPShannon(), new OneColumnFile(fileNameRoot+"npshannon")));
- outputNames.push_back(fileNameRoot+"npshannon"); outputTypes["npshannon"].push_back(fileNameRoot+"npshannon");
+ cDisplays.push_back(new CollectDisplay(new NPShannon(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("npshannon"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("npshannon")); outputTypes["npshannon"].push_back(fileNameRoot+getOutputFileNameTag("npshannon"));
}else if (Estimators[i] == "heip") {
- cDisplays.push_back(new CollectDisplay(new Heip(), new OneColumnFile(fileNameRoot+"heip")));
- outputNames.push_back(fileNameRoot+"heip"); outputTypes["heip"].push_back(fileNameRoot+"heip");
+ cDisplays.push_back(new CollectDisplay(new Heip(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("heip"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("heip")); outputTypes["heip"].push_back(fileNameRoot+getOutputFileNameTag("heip"));
}else if (Estimators[i] == "smithwilson") {
- cDisplays.push_back(new CollectDisplay(new SmithWilson(), new OneColumnFile(fileNameRoot+"smithwilson")));
- outputNames.push_back(fileNameRoot+"smithwilson"); outputTypes["smithwilson"].push_back(fileNameRoot+"smithwilson");
+ cDisplays.push_back(new CollectDisplay(new SmithWilson(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("smithwilson"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("smithwilson")); outputTypes["smithwilson"].push_back(fileNameRoot+getOutputFileNameTag("smithwilson"));
}else if (Estimators[i] == "simpson") {
- cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"simpson")));
- outputNames.push_back(fileNameRoot+"simpson"); outputTypes["simpson"].push_back(fileNameRoot+"simpson");
+ cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("simpson"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("simpson")); outputTypes["simpson"].push_back(fileNameRoot+getOutputFileNameTag("simpson"));
}else if (Estimators[i] == "simpsoneven") {
- cDisplays.push_back(new CollectDisplay(new SimpsonEven(), new OneColumnFile(fileNameRoot+"simpsoneven")));
- outputNames.push_back(fileNameRoot+"simpsoneven"); outputTypes["simpsoneven"].push_back(fileNameRoot+"simpsoneven");
+ cDisplays.push_back(new CollectDisplay(new SimpsonEven(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("simpsoneven"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("simpsoneven")); outputTypes["simpsoneven"].push_back(fileNameRoot+getOutputFileNameTag("simpsoneven"));
}else if (Estimators[i] == "invsimpson") {
- cDisplays.push_back(new CollectDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+"invsimpson")));
- outputNames.push_back(fileNameRoot+"invsimpson"); outputTypes["invsimpson"].push_back(fileNameRoot+"invsimpson");
+ cDisplays.push_back(new CollectDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("invsimpson"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("invsimpson")); outputTypes["invsimpson"].push_back(fileNameRoot+getOutputFileNameTag("invsimpson"));
}else if (Estimators[i] == "bootstrap") {
- cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+"bootstrap")));
- outputNames.push_back(fileNameRoot+"bootstrap"); outputTypes["bootstrap"].push_back(fileNameRoot+"bootstrap");
+ cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("bootstrap"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("bootstrap")); outputTypes["bootstrap"].push_back(fileNameRoot+getOutputFileNameTag("bootstrap"));
}else if (Estimators[i] == "geometric") {
- cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geometric")));
- outputNames.push_back(fileNameRoot+"geometric"); outputTypes["geometric"].push_back(fileNameRoot+"geometric");
+ cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("geometric"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("geometric")); outputTypes["geometric"].push_back(fileNameRoot+getOutputFileNameTag("geometric"));
}else if (Estimators[i] == "qstat") {
- cDisplays.push_back(new CollectDisplay(new QStat(), new OneColumnFile(fileNameRoot+"qstat")));
- outputNames.push_back(fileNameRoot+"qstat"); outputTypes["qstat"].push_back(fileNameRoot+"qstat");
+ cDisplays.push_back(new CollectDisplay(new QStat(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("qstat"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("qstat")); outputTypes["qstat"].push_back(fileNameRoot+getOutputFileNameTag("qstat"));
}else if (Estimators[i] == "logseries") {
- cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logseries")));
- outputNames.push_back(fileNameRoot+"logseries"); outputTypes["logseries"].push_back(fileNameRoot+"logseries");
+ cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("logseries"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("logseries")); outputTypes["logseries"].push_back(fileNameRoot+getOutputFileNameTag("logseries"));
}else if (Estimators[i] == "bergerparker") {
- cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+"bergerparker")));
- outputNames.push_back(fileNameRoot+"bergerparker"); outputTypes["bergerparker"].push_back(fileNameRoot+"bergerparker");
+ cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("bergerparker"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("bergerparker")); outputTypes["bergerparker"].push_back(fileNameRoot+getOutputFileNameTag("bergerparker"));
}else if (Estimators[i] == "bstick") {
- cDisplays.push_back(new CollectDisplay(new BStick(), new ThreeColumnFile(fileNameRoot+"bstick")));
- outputNames.push_back(fileNameRoot+"bstick"); outputTypes["bstick"].push_back(fileNameRoot+"bstick");
+ cDisplays.push_back(new CollectDisplay(new BStick(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("bstick"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("bstick")); outputTypes["bstick"].push_back(fileNameRoot+getOutputFileNameTag("bstick"));
}else if (Estimators[i] == "goodscoverage") {
- cDisplays.push_back(new CollectDisplay(new GoodsCoverage(), new OneColumnFile(fileNameRoot+"goodscoverage")));
- outputNames.push_back(fileNameRoot+"goodscoverage"); outputTypes["goodscoverage"].push_back(fileNameRoot+"goodscoverage");
+ cDisplays.push_back(new CollectDisplay(new GoodsCoverage(), new OneColumnFile(fileNameRoot+getOutputFileNameTag("goodscoverage"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("goodscoverage")); outputTypes["goodscoverage"].push_back(fileNameRoot+getOutputFileNameTag("goodscoverage"));
}else if (Estimators[i] == "efron") {
- cDisplays.push_back(new CollectDisplay(new Efron(size), new OneColumnFile(fileNameRoot+"efron")));
- outputNames.push_back(fileNameRoot+"efron"); outputTypes["efron"].push_back(fileNameRoot+"efron");
+ cDisplays.push_back(new CollectDisplay(new Efron(size), new OneColumnFile(fileNameRoot+getOutputFileNameTag("efron"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("efron")); outputTypes["efron"].push_back(fileNameRoot+getOutputFileNameTag("efron"));
}else if (Estimators[i] == "boneh") {
- cDisplays.push_back(new CollectDisplay(new Boneh(size), new OneColumnFile(fileNameRoot+"boneh")));
- outputNames.push_back(fileNameRoot+"boneh"); outputTypes["boneh"].push_back(fileNameRoot+"boneh");
+ cDisplays.push_back(new CollectDisplay(new Boneh(size), new OneColumnFile(fileNameRoot+getOutputFileNameTag("boneh"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("boneh")); outputTypes["boneh"].push_back(fileNameRoot+getOutputFileNameTag("boneh"));
}else if (Estimators[i] == "solow") {
- cDisplays.push_back(new CollectDisplay(new Solow(size), new OneColumnFile(fileNameRoot+"solow")));
- outputNames.push_back(fileNameRoot+"solow"); outputTypes["solow"].push_back(fileNameRoot+"solow");
+ cDisplays.push_back(new CollectDisplay(new Solow(size), new OneColumnFile(fileNameRoot+getOutputFileNameTag("solow"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("solow")); outputTypes["solow"].push_back(fileNameRoot+getOutputFileNameTag("solow"));
}else if (Estimators[i] == "shen") {
- cDisplays.push_back(new CollectDisplay(new Shen(size, abund), new OneColumnFile(fileNameRoot+"shen")));
- outputNames.push_back(fileNameRoot+"shen"); outputTypes["shen"].push_back(fileNameRoot+"shen");
+ cDisplays.push_back(new CollectDisplay(new Shen(size, abund), new OneColumnFile(fileNameRoot+getOutputFileNameTag("shen"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("shen")); outputTypes["shen"].push_back(fileNameRoot+getOutputFileNameTag("shen"));
}
}
}
string getCommandName() { return "collect.single"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
string getCitation() { return "Schloss PD, Handelsman J (2006). Introducing SONS, A tool that compares the membership of microbial communities. Appl Environ Microbiol 72: 6773-9. \nhttp://www.mothur.org/wiki/Collect.single"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getDescription() { return "generates collector's curves using calculators, that describe the richness, diversity, and other features of individual samples"; }
}
}
//**********************************************************************************************************************
+string CollectSharedCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "sharedchao") { outputFileName = "shared.chao"; }
+ else if (type == "sharedsobs") { outputFileName = "shared.sobs"; }
+ else if (type == "sharedace") { outputFileName = "shared.ace"; }
+ else if (type == "jabund") { outputFileName = "jabund"; }
+ else if (type == "sorabund") { outputFileName = "sorabund"; }
+ else if (type == "jclass") { outputFileName = "jclass"; }
+ else if (type == "sorclass") { outputFileName = "sorclass"; }
+ else if (type == "jest") { outputFileName = "jest"; }
+ else if (type == "sorest") { outputFileName = "sorest"; }
+ else if (type == "thetayc") { outputFileName = "thetayc"; }
+ else if (type == "thetan") { outputFileName = "thetan"; }
+ else if (type == "kstest") { outputFileName = "kstest"; }
+ else if (type == "whittaker") { outputFileName = "whittaker"; }
+ else if (type == "sharednseqs") { outputFileName = "shared.nseqs"; }
+ else if (type == "ochiai") { outputFileName = "ochiai"; }
+ else if (type == "anderberg") { outputFileName = "anderberg"; }
+ else if (type == "kulczynski") { outputFileName = "kulczynski"; }
+ else if (type == "kulczynskicody") { outputFileName = "kulczynskicody"; }
+ else if (type == "lennon") { outputFileName = "lennon"; }
+ else if (type == "morisitahorn") { outputFileName = "morisitahorn"; }
+ else if (type == "braycurtis") { outputFileName = "braycurtis"; }
+ else if (type == "odum") { outputFileName = "odum"; }
+ else if (type == "canberra") { outputFileName = "canberra"; }
+ else if (type == "structeuclidean") { outputFileName = "structeuclidean"; }
+ else if (type == "structchord") { outputFileName = "structchord"; }
+ else if (type == "hellinger") { outputFileName = "hellinger"; }
+ else if (type == "manhattan") { outputFileName = "manhattan"; }
+ else if (type == "structpearson") { outputFileName = "structpearson"; }
+ else if (type == "soergel") { outputFileName = "soergel"; }
+ else if (type == "spearman") { outputFileName = "spearman"; }
+ else if (type == "structkulczynski") { outputFileName = "structkulczynski";}
+ else if (type == "structchi2") { outputFileName = "structchi2"; }
+ else if (type == "speciesprofile") { outputFileName = "speciesprofile"; }
+ else if (type == "hamming") { outputFileName = "hamming"; }
+ else if (type == "gower") { outputFileName = "gower"; }
+ else if (type == "memchi2") { outputFileName = "memchi2"; }
+ else if (type == "memchord") { outputFileName = "memchord"; }
+ else if (type == "memeuclidean") { outputFileName = "memeuclidean"; }
+ else if (type == "mempearson") { outputFileName = "mempearson"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CollectSharedCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
CollectSharedCommand::CollectSharedCommand(){
try {
abort = true; calledHelp = true;
for (int i=0; i<Estimators.size(); i++) {
if (validCalculator.isValidCalculator("shared", Estimators[i]) == true) {
if (Estimators[i] == "sharedchao") {
- cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao")));
- outputNames.push_back(fileNameRoot+"shared.chao"); outputTypes["sharedchao"].push_back(fileNameRoot+"shared.chao");
+ cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("sharedchao"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedchao")); outputTypes["sharedchao"].push_back(fileNameRoot+getOutputFileNameTag("sharedchao"));
}else if (Estimators[i] == "sharedsobs") {
- cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs")));
- outputNames.push_back(fileNameRoot+"shared.sobs"); outputTypes["sharedsobs"].push_back(fileNameRoot+"shared.sobs");
+ cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("sharedsobs"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedsobs")); outputTypes["sharedsobs"].push_back(fileNameRoot+getOutputFileNameTag("sharedsobs"));
}else if (Estimators[i] == "sharedace") {
- cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace")));
- outputNames.push_back(fileNameRoot+"shared.ace"); outputTypes["sharedace"].push_back(fileNameRoot+"shared.ace");
+ cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("sharedace"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedace")); outputTypes["sharedace"].push_back(fileNameRoot+getOutputFileNameTag("sharedace"));
}else if (Estimators[i] == "jabund") {
- cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+"jabund")));
- outputNames.push_back(fileNameRoot+"jabund"); outputTypes["jabund"].push_back(fileNameRoot+"jabund");
+ cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("jabund"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("jabund")); outputTypes["jabund"].push_back(fileNameRoot+getOutputFileNameTag("jabund"));
}else if (Estimators[i] == "sorabund") {
- cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+"sorabund")));
- outputNames.push_back(fileNameRoot+"sorabund"); outputTypes["sorabund"].push_back(fileNameRoot+"sorabund");
+ cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("sorabund"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("sorabund")); outputTypes["sorabund"].push_back(fileNameRoot+getOutputFileNameTag("sorabund"));
}else if (Estimators[i] == "jclass") {
- cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+"jclass")));
- outputNames.push_back(fileNameRoot+"jclass"); outputTypes["jclass"].push_back(fileNameRoot+"jclass");
+ cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("jclass"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("jclass")); outputTypes["jclass"].push_back(fileNameRoot+getOutputFileNameTag("jclass"));
}else if (Estimators[i] == "sorclass") {
- cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+"sorclass")));
- outputNames.push_back(fileNameRoot+"sorclass"); outputTypes["sorclass"].push_back(fileNameRoot+"sorclass");
+ cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("sorclass"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("sorclass")); outputTypes["sorclass"].push_back(fileNameRoot+getOutputFileNameTag("sorclass"));
}else if (Estimators[i] == "jest") {
- cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+"jest")));
- outputNames.push_back(fileNameRoot+"jest"); outputTypes["jest"].push_back(fileNameRoot+"jest");
+ cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("jest"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("jest")); outputTypes["jest"].push_back(fileNameRoot+getOutputFileNameTag("jest"));
}else if (Estimators[i] == "sorest") {
- cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+"sorest")));
- outputNames.push_back(fileNameRoot+"sorest"); outputTypes["sorest"].push_back(fileNameRoot+"sorest");
+ cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("sorest"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("sorest")); outputTypes["sorest"].push_back(fileNameRoot+getOutputFileNameTag("sorest"));
}else if (Estimators[i] == "thetayc") {
- cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+"thetayc")));
- outputNames.push_back(fileNameRoot+"thetayc"); outputTypes["thetayc"].push_back(fileNameRoot+"thetayc");
+ cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("thetayc"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("thetayc")); outputTypes["thetayc"].push_back(fileNameRoot+getOutputFileNameTag("thetayc"));
}else if (Estimators[i] == "thetan") {
- cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+"thetan")));
- outputNames.push_back(fileNameRoot+"thetan"); outputTypes["thetan"].push_back(fileNameRoot+"thetan");
+ cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("thetan"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("thetan")); outputTypes["thetan"].push_back(fileNameRoot+getOutputFileNameTag("thetan"));
}else if (Estimators[i] == "kstest") {
- cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+"kstest")));
- outputNames.push_back(fileNameRoot+"kstest"); outputTypes["kstest"].push_back(fileNameRoot+"kstest");
+ cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("kstest"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("kstest")); outputTypes["kstest"].push_back(fileNameRoot+getOutputFileNameTag("kstest"));
}else if (Estimators[i] == "whittaker") {
- cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker")));
- outputNames.push_back(fileNameRoot+"whittaker"); outputTypes["whittaker"].push_back(fileNameRoot+"whittaker");
+ cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("whittaker"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("whittaker")); outputTypes["whittaker"].push_back(fileNameRoot+getOutputFileNameTag("whittaker"));
}else if (Estimators[i] == "sharednseqs") {
- cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs")));
- outputNames.push_back(fileNameRoot+"shared.nseqs"); outputTypes["shared.nseqs"].push_back(fileNameRoot+"shared.nseqs");
+ cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("sharednseqs"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharednseqs")); outputTypes["shared.nseqs"].push_back(fileNameRoot+getOutputFileNameTag("sharednseqs"));
}else if (Estimators[i] == "ochiai") {
- cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai")));
- outputNames.push_back(fileNameRoot+"ochiai"); outputTypes["ochiai"].push_back(fileNameRoot+"ochiai");
+ cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("ochiai"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("ochiai")); outputTypes["ochiai"].push_back(fileNameRoot+getOutputFileNameTag("ochiai"));
}else if (Estimators[i] == "anderberg") {
- cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg")));
- outputNames.push_back(fileNameRoot+"anderberg"); outputTypes["anderberg"].push_back(fileNameRoot+"anderberg");
+ cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("anderberg"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("anderberg")); outputTypes["anderberg"].push_back(fileNameRoot+getOutputFileNameTag("anderberg"));
}else if (Estimators[i] == "kulczynski") {
- cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski")));
- outputNames.push_back(fileNameRoot+"kulczynski"); outputTypes["kulczynski"].push_back(fileNameRoot+"kulczynski");
+ cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("kulczynski"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("kulczynski")); outputTypes["kulczynski"].push_back(fileNameRoot+getOutputFileNameTag("kulczynski"));
}else if (Estimators[i] == "kulczynskicody") {
- cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+"kulczynskicody")));
- outputNames.push_back(fileNameRoot+"kulczynskicody"); outputTypes["kulczynskicody"].push_back(fileNameRoot+"kulczynskicody");
+ cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("kulczynskicody"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("kulczynskicody")); outputTypes["kulczynskicody"].push_back(fileNameRoot+getOutputFileNameTag("kulczynskicody"));
}else if (Estimators[i] == "lennon") {
- cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+"lennon")));
- outputNames.push_back(fileNameRoot+"lennon"); outputTypes["lennon"].push_back(fileNameRoot+"lennon");
+ cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("lennon"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("lennon")); outputTypes["lennon"].push_back(fileNameRoot+getOutputFileNameTag("lennon"));
}else if (Estimators[i] == "morisitahorn") {
- cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+"morisitahorn")));
- outputNames.push_back(fileNameRoot+"morisitahorn"); outputTypes["morisitahorn"].push_back(fileNameRoot+"morisitahorn");
+ cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("morisitahorn"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("morisitahorn")); outputTypes["morisitahorn"].push_back(fileNameRoot+getOutputFileNameTag("morisitahorn"));
}else if (Estimators[i] == "braycurtis") {
- cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+"braycurtis")));
- outputNames.push_back(fileNameRoot+"braycurtis"); outputTypes["braycurtis"].push_back(fileNameRoot+"braycurtis");
+ cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("braycurtis"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("braycurtis")); outputTypes["braycurtis"].push_back(fileNameRoot+getOutputFileNameTag("braycurtis"));
}else if (Estimators[i] == "odum") {
- cDisplays.push_back(new CollectDisplay(new Odum(), new SharedOneColumnFile(fileNameRoot+"odum")));
- outputNames.push_back(fileNameRoot+"odum"); outputTypes["odum"].push_back(fileNameRoot+"odum");
+ cDisplays.push_back(new CollectDisplay(new Odum(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("odum"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("odum")); outputTypes["odum"].push_back(fileNameRoot+getOutputFileNameTag("odum"));
}else if (Estimators[i] == "canberra") {
- cDisplays.push_back(new CollectDisplay(new Canberra(), new SharedOneColumnFile(fileNameRoot+"canberra")));
- outputNames.push_back(fileNameRoot+"canberra"); outputTypes["canberra"].push_back(fileNameRoot+"canberra");
+ cDisplays.push_back(new CollectDisplay(new Canberra(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("canberra"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("canberra")); outputTypes["canberra"].push_back(fileNameRoot+getOutputFileNameTag("canberra"));
}else if (Estimators[i] == "structeuclidean") {
- cDisplays.push_back(new CollectDisplay(new StructEuclidean(), new SharedOneColumnFile(fileNameRoot+"structeuclidean")));
- outputNames.push_back(fileNameRoot+"structeuclidean"); outputTypes["structeuclidean"].push_back(fileNameRoot+"structeuclidean");
+ cDisplays.push_back(new CollectDisplay(new StructEuclidean(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("structeuclidean"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("structeuclidean")); outputTypes["structeuclidean"].push_back(fileNameRoot+getOutputFileNameTag("structeuclidean"));
}else if (Estimators[i] == "structchord") {
- cDisplays.push_back(new CollectDisplay(new StructChord(), new SharedOneColumnFile(fileNameRoot+"structchord")));
- outputNames.push_back(fileNameRoot+"structchord"); outputTypes["structchord"].push_back(fileNameRoot+"structchord");
+ cDisplays.push_back(new CollectDisplay(new StructChord(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("structchord"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("structchord")); outputTypes["structchord"].push_back(fileNameRoot+getOutputFileNameTag("structchord"));
}else if (Estimators[i] == "hellinger") {
- cDisplays.push_back(new CollectDisplay(new Hellinger(), new SharedOneColumnFile(fileNameRoot+"hellinger")));
- outputNames.push_back(fileNameRoot+"hellinger"); outputTypes["hellinger"].push_back(fileNameRoot+"hellinger");
+ cDisplays.push_back(new CollectDisplay(new Hellinger(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("hellinger"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("hellinger")); outputTypes["hellinger"].push_back(fileNameRoot+getOutputFileNameTag("hellinger"));
}else if (Estimators[i] == "manhattan") {
- cDisplays.push_back(new CollectDisplay(new Manhattan(), new SharedOneColumnFile(fileNameRoot+"manhattan")));
- outputNames.push_back(fileNameRoot+"manhattan"); outputTypes["manhattan"].push_back(fileNameRoot+"manhattan");
+ cDisplays.push_back(new CollectDisplay(new Manhattan(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("manhattan"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("manhattan")); outputTypes["manhattan"].push_back(fileNameRoot+getOutputFileNameTag("manhattan"));
}else if (Estimators[i] == "structpearson") {
- cDisplays.push_back(new CollectDisplay(new StructPearson(), new SharedOneColumnFile(fileNameRoot+"structpearson")));
- outputNames.push_back(fileNameRoot+"structpearson"); outputTypes["structpearson"].push_back(fileNameRoot+"structpearson");
+ cDisplays.push_back(new CollectDisplay(new StructPearson(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("structpearson"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("structpearson")); outputTypes["structpearson"].push_back(fileNameRoot+getOutputFileNameTag("structpearson"));
}else if (Estimators[i] == "soergel") {
- cDisplays.push_back(new CollectDisplay(new Soergel(), new SharedOneColumnFile(fileNameRoot+"soergel")));
- outputNames.push_back(fileNameRoot+"soergel"); outputTypes["soergel"].push_back(fileNameRoot+"soergel");
+ cDisplays.push_back(new CollectDisplay(new Soergel(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("soergel"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("soergel")); outputTypes["soergel"].push_back(fileNameRoot+getOutputFileNameTag("soergel"));
}else if (Estimators[i] == "spearman") {
- cDisplays.push_back(new CollectDisplay(new Spearman(), new SharedOneColumnFile(fileNameRoot+"spearman")));
- outputNames.push_back(fileNameRoot+"spearman"); outputTypes["spearman"].push_back(fileNameRoot+"spearman");
+ cDisplays.push_back(new CollectDisplay(new Spearman(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("spearman"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("spearman")); outputTypes["spearman"].push_back(fileNameRoot+getOutputFileNameTag("spearman"));
}else if (Estimators[i] == "structkulczynski") {
- cDisplays.push_back(new CollectDisplay(new StructKulczynski(), new SharedOneColumnFile(fileNameRoot+"structkulczynski")));
- outputNames.push_back(fileNameRoot+"structkulczynski"); outputTypes["structkulczynski"].push_back(fileNameRoot+"structkulczynski");
+ cDisplays.push_back(new CollectDisplay(new StructKulczynski(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("structkulczynski"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("structkulczynski")); outputTypes["structkulczynski"].push_back(fileNameRoot+getOutputFileNameTag("structkulczynski"));
}else if (Estimators[i] == "speciesprofile") {
- cDisplays.push_back(new CollectDisplay(new SpeciesProfile(), new SharedOneColumnFile(fileNameRoot+"speciesprofile")));
- outputNames.push_back(fileNameRoot+"speciesprofile"); outputTypes["speciesprofile"].push_back(fileNameRoot+"speciesprofile");
+ cDisplays.push_back(new CollectDisplay(new SpeciesProfile(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("speciesprofile"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("speciesprofile")); outputTypes["speciesprofile"].push_back(fileNameRoot+getOutputFileNameTag("speciesprofile"));
}else if (Estimators[i] == "hamming") {
- cDisplays.push_back(new CollectDisplay(new Hamming(), new SharedOneColumnFile(fileNameRoot+"hamming")));
- outputNames.push_back(fileNameRoot+"hamming"); outputTypes["hamming"].push_back(fileNameRoot+"hamming");
+ cDisplays.push_back(new CollectDisplay(new Hamming(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("hamming"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("hamming")); outputTypes["hamming"].push_back(fileNameRoot+getOutputFileNameTag("hamming"));
}else if (Estimators[i] == "structchi2") {
- cDisplays.push_back(new CollectDisplay(new StructChi2(), new SharedOneColumnFile(fileNameRoot+"structchi2")));
- outputNames.push_back(fileNameRoot+"structchi2"); outputTypes["structchi2"].push_back(fileNameRoot+"structchi2");
+ cDisplays.push_back(new CollectDisplay(new StructChi2(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("structchi2"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("structchi2")); outputTypes["structchi2"].push_back(fileNameRoot+getOutputFileNameTag("structchi2"));
}else if (Estimators[i] == "gower") {
- cDisplays.push_back(new CollectDisplay(new Gower(), new SharedOneColumnFile(fileNameRoot+"gower")));
- outputNames.push_back(fileNameRoot+"gower"); outputTypes["gower"].push_back(fileNameRoot+"gower");
+ cDisplays.push_back(new CollectDisplay(new Gower(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("gower"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("gower")); outputTypes["gower"].push_back(fileNameRoot+getOutputFileNameTag("gower"));
}else if (Estimators[i] == "memchi2") {
- cDisplays.push_back(new CollectDisplay(new MemChi2(), new SharedOneColumnFile(fileNameRoot+"memchi2")));
- outputNames.push_back(fileNameRoot+"memchi2"); outputTypes["memchi2"].push_back(fileNameRoot+"memchi2");
+ cDisplays.push_back(new CollectDisplay(new MemChi2(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("memchi2"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("memchi2")); outputTypes["memchi2"].push_back(fileNameRoot+getOutputFileNameTag("memchi2"));
}else if (Estimators[i] == "memchord") {
- cDisplays.push_back(new CollectDisplay(new MemChord(), new SharedOneColumnFile(fileNameRoot+"memchord")));
- outputNames.push_back(fileNameRoot+"memchord"); outputTypes["memchord"].push_back(fileNameRoot+"memchord");
+ cDisplays.push_back(new CollectDisplay(new MemChord(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("memchord"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("memchord")); outputTypes["memchord"].push_back(fileNameRoot+getOutputFileNameTag("memchord"));
}else if (Estimators[i] == "memeuclidean") {
- cDisplays.push_back(new CollectDisplay(new MemEuclidean(), new SharedOneColumnFile(fileNameRoot+"memeuclidean")));
- outputNames.push_back(fileNameRoot+"memeuclidean"); outputTypes["memeuclidean"].push_back(fileNameRoot+"memeuclidean");
+ cDisplays.push_back(new CollectDisplay(new MemEuclidean(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("memeuclidean"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("memeuclidean")); outputTypes["memeuclidean"].push_back(fileNameRoot+getOutputFileNameTag("memeuclidean"));
}else if (Estimators[i] == "mempearson") {
- cDisplays.push_back(new CollectDisplay(new MemPearson(), new SharedOneColumnFile(fileNameRoot+"mempearson")));
- outputNames.push_back(fileNameRoot+"mempearson"); outputTypes["mempearson"].push_back(fileNameRoot+"mempearson");
+ cDisplays.push_back(new CollectDisplay(new MemPearson(), new SharedOneColumnFile(fileNameRoot+getOutputFileNameTag("mempearson"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("mempearson")); outputTypes["mempearson"].push_back(fileNameRoot+getOutputFileNameTag("mempearson"));
}
}
vector<string> setParameters();
string getCommandName() { return "collect.shared"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Schloss PD, Handelsman J (2006). Introducing SONS, A tool that compares the membership of microbial communities. Appl Environ Microbiol 72: 6773-9. \nhttp://www.mothur.org/wiki/Collect.shared"; }
string getDescription() { return "generates collector's curves for calculators, which describe the similarity between communities or their shared richness"; }
virtual string getDescription() = 0;
virtual map<string, vector<string> > getOutputFiles() { return outputTypes; }
+ virtual string getOutputFileNameTag(string, string) = 0; //we may not know the complete filename, because some commands use info from the file to create the output file name (ie label). but we do the ending tag which should be enough to find the output file name from a list. Allows for optional passing of the inputFileName for the commands that require its extension.
virtual vector<string> setParameters() = 0; //to fill parameters
virtual vector<CommandParameter> getParameters() { return parameters; }
*/
#include "command.hpp"
-#include "readdistcommand.h"
-#include "readtreecommand.h"
-#include "readotucommand.h"
#include "clustercommand.h"
#include "collectcommand.h"
#include "collectsharedcommand.h"
#include "getotulabelscommand.h"
#include "removeotulabelscommand.h"
#include "makecontigscommand.h"
+#include "loadlogfilecommand.h"
/*******************************************************/
append = false;
//initialize list of valid commands
- commands["read.dist"] = "read.dist";
- commands["read.otu"] = "read.otu";
- commands["read.tree"] = "read.tree";
commands["make.shared"] = "make.shared";
commands["bin.seqs"] = "bin.seqs";
commands["get.oturep"] = "get.oturep";
commands["get.otulabels"] = "get.otulabels";
commands["remove.otulabels"] = "remove.otulabels";
commands["make.contigs"] = "make.contigs";
+ commands["load.logfile"] = "load.logfile";
+ commands["make.table"] = "make.table";
commands["quit"] = "MPIEnabled";
}
else { optionString += "inputdir=" + inputDir; }
}
- if(commandName == "read.dist") { command = new ReadDistCommand(optionString); }
- else if(commandName == "read.otu") { command = new ReadOtuCommand(optionString); }
- else if(commandName == "read.tree") { command = new ReadTreeCommand(optionString); }
- else if(commandName == "cluster") { command = new ClusterCommand(optionString); }
+ if(commandName == "cluster") { command = new ClusterCommand(optionString); }
else if(commandName == "unique.seqs") { command = new DeconvoluteCommand(optionString); }
else if(commandName == "parsimony") { command = new ParsimonyCommand(optionString); }
else if(commandName == "help") { command = new HelpCommand(optionString); }
else if(commandName == "make.shared") { command = new SharedCommand(optionString); }
else if(commandName == "get.commandinfo") { command = new GetCommandInfoCommand(optionString); }
else if(commandName == "deunique.tree") { command = new DeuniqueTreeCommand(optionString); }
- else if(commandName == "count.seqs") { command = new CountSeqsCommand(optionString); }
+ else if((commandName == "count.seqs") || (commandName == "make.table")) { command = new CountSeqsCommand(optionString); }
else if(commandName == "count.groups") { command = new CountGroupsCommand(optionString); }
else if(commandName == "clear.memory") { command = new ClearMemoryCommand(optionString); }
else if(commandName == "summary.tax") { command = new SummaryTaxCommand(optionString); }
else if(commandName == "get.otulabels") { command = new GetOtuLabelsCommand(optionString); }
else if(commandName == "remove.otulabels") { command = new RemoveOtuLabelsCommand(optionString); }
else if(commandName == "make.contigs") { command = new MakeContigsCommand(optionString); }
+ else if(commandName == "load.logfile") { command = new LoadLogfileCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else { optionString += "inputdir=" + inputDir; }
}
- if(commandName == "read.dist") { pipecommand = new ReadDistCommand(optionString); }
- else if(commandName == "read.otu") { pipecommand = new ReadOtuCommand(optionString); }
- else if(commandName == "read.tree") { pipecommand = new ReadTreeCommand(optionString); }
- else if(commandName == "cluster") { pipecommand = new ClusterCommand(optionString); }
+ if(commandName == "cluster") { pipecommand = new ClusterCommand(optionString); }
else if(commandName == "unique.seqs") { pipecommand = new DeconvoluteCommand(optionString); }
else if(commandName == "parsimony") { pipecommand = new ParsimonyCommand(optionString); }
else if(commandName == "help") { pipecommand = new HelpCommand(optionString); }
else if(commandName == "make.shared") { pipecommand = new SharedCommand(optionString); }
else if(commandName == "get.commandinfo") { pipecommand = new GetCommandInfoCommand(optionString); }
else if(commandName == "deunique.tree") { pipecommand = new DeuniqueTreeCommand(optionString); }
- else if(commandName == "count.seqs") { pipecommand = new CountSeqsCommand(optionString); }
+ else if((commandName == "count.seqs") || (commandName == "make.table")) { pipecommand = new CountSeqsCommand(optionString); }
else if(commandName == "count.groups") { pipecommand = new CountGroupsCommand(optionString); }
else if(commandName == "clear.memory") { pipecommand = new ClearMemoryCommand(optionString); }
else if(commandName == "summary.tax") { pipecommand = new SummaryTaxCommand(optionString); }
else if(commandName == "get.otulabels") { pipecommand = new GetOtuLabelsCommand(optionString); }
else if(commandName == "remove.otulabels") { pipecommand = new RemoveOtuLabelsCommand(optionString); }
else if(commandName == "make.contigs") { pipecommand = new MakeContigsCommand(optionString); }
+ else if(commandName == "load.logfile") { pipecommand = new LoadLogfileCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
try {
delete shellcommand; //delete the old command
- if(commandName == "read.dist") { shellcommand = new ReadDistCommand(); }
- else if(commandName == "read.otu") { shellcommand = new ReadOtuCommand(); }
- else if(commandName == "read.tree") { shellcommand = new ReadTreeCommand(); }
- else if(commandName == "cluster") { shellcommand = new ClusterCommand(); }
+ if(commandName == "cluster") { shellcommand = new ClusterCommand(); }
else if(commandName == "unique.seqs") { shellcommand = new DeconvoluteCommand(); }
else if(commandName == "parsimony") { shellcommand = new ParsimonyCommand(); }
else if(commandName == "help") { shellcommand = new HelpCommand(); }
else if(commandName == "make.shared") { shellcommand = new SharedCommand(); }
else if(commandName == "get.commandinfo") { shellcommand = new GetCommandInfoCommand(); }
else if(commandName == "deunique.tree") { shellcommand = new DeuniqueTreeCommand(); }
- else if(commandName == "count.seqs") { shellcommand = new CountSeqsCommand(); }
+ else if((commandName == "count.seqs") || (commandName == "make.table")) { shellcommand = new CountSeqsCommand(); }
else if(commandName == "count.groups") { shellcommand = new CountGroupsCommand(); }
else if(commandName == "clear.memory") { shellcommand = new ClearMemoryCommand(); }
else if(commandName == "summary.tax") { shellcommand = new SummaryTaxCommand(); }
else if(commandName == "get.otulabels") { shellcommand = new GetOtuLabelsCommand(); }
else if(commandName == "remove.otulabels") { shellcommand = new RemoveOtuLabelsCommand(); }
else if(commandName == "make.contigs") { shellcommand = new MakeContigsCommand(); }
+ else if(commandName == "load.logfile") { shellcommand = new LoadLogfileCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
/***********************************************************************/
-CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string s) :
+CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
Cluster(rav, lv, dm, c, s)
{}
/***********************************************************************/
//This function updates the distance based on the furthest neighbor method.
-bool CompleteLinkage::updateDistance(MatData& colCell, MatData& rowCell) {
+bool CompleteLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) {
try {
bool changed = false;
- if (colCell->dist < rowCell->dist) {
- colCell->dist = rowCell->dist;
+ if (colCell.dist < rowCell.dist) {
+ colCell.dist = rowCell.dist;
changed = true;
}
return(changed);
exit(1);
}
}
+//**********************************************************************************************************************
+string ConsensusSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "cons.fasta"; }
+ else if (type == "name") { outputFileName = "cons.names"; }
+ else if (type == "summary") { outputFileName = "cons.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ConsensusSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
ConsensusSeqsCommand::ConsensusSeqsCommand(){
if (listfile == "") {
ofstream outSummary;
- string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "cons.summary";
+ string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("summary");
m->openOutputFile(outputSummaryFile, outSummary);
outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint);
outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile);
outSummary << "PositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
ofstream outFasta;
- string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "cons.fasta";
+ string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
m->openOutputFile(outputFastaFile, outFasta);
outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
try{
ofstream outSummary;
- string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.summary";
+ string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + getOutputFileNameTag("summary");
m->openOutputFile(outputSummaryFile, outSummary);
outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint);
outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile);
ofstream outName;
- string outputNameFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.names";
+ string outputNameFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + getOutputFileNameTag("name");
m->openOutputFile(outputNameFile, outName);
outputNames.push_back(outputNameFile); outputTypes["name"].push_back(outputNameFile);
ofstream outFasta;
- string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.fasta";
+ string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + getOutputFileNameTag("fasta");
m->openOutputFile(outputFastaFile, outFasta);
outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
int ConsensusSeqsCommand::readNames(){
try{
-
- ifstream in;
- m->openInputFile(namefile, in);
-
- string thisname, repnames;
- map<string, string>::iterator it;
-
- bool error = false;
-
- while(!in.eof()){
-
- if (m->control_pressed) { break; }
-
- in >> thisname; m->gobble(in); //read from first column
- in >> repnames; //read from second column
-
- it = nameMap.find(thisname);
+ map<string, string> temp;
+ map<string, string>::iterator it;
+ bool error = false;
+
+ m->readNames(namefile, temp); //use central buffered read
+
+ for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end(); itTemp++) {
+ string thisname, repnames;
+ thisname = itTemp->first;
+ repnames = itTemp->second;
+
+ it = nameMap.find(thisname);
if (it != nameMap.end()) { //then this sequence was in the fastafile
-
+ nameFileMap[thisname] = repnames; //for later when outputting the new namesFile if the list file is unique
+
vector<string> splitRepNames;
m->splitAtComma(repnames, splitRepNames);
- nameFileMap[thisname] = repnames; //for later when outputting the new namesFile if the list file is unique
for (int i = 0; i < splitRepNames.size(); i++) { nameMap[splitRepNames[i]] = thisname; }
}else{ m->mothurOut("[ERROR]: " + thisname + " is not in the fasta file, please correct."); m->mothurOutEndLine(); error = true; }
-
- m->gobble(in);
- }
-
- in.close();
-
+ }
+
if (error) { m->control_pressed = true; }
return 0;
vector<string> setParameters();
string getCommandName() { return "consensus.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Consensus.seqs"; }
string getDescription() { return "create a consensus sequence for each OTU or for a fasta file"; }
}
}
//**********************************************************************************************************************
+string CooccurrenceCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "summary") { outputFileName = "cooccurence.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CooccurrenceCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
CooccurrenceCommand::CooccurrenceCommand(){
try {
abort = true; calledHelp = true;
set<string> userLabels = labels;
ofstream out;
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "cooccurence.summary";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("summary");
m->openOutputFile(outputFileName, out);
outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
vector<string> setParameters();
string getCommandName() { return "Cooccurrence"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Cooccurrence"; }
string getDescription() { return "calculates four metrics and tests their significance to assess whether presence-absence patterns are different than what one would expect by chance."; }
}
}
//**********************************************************************************************************************
+string CorrAxesCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "corraxes") { outputFileName = "corr.axes"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CorrAxesCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
CorrAxesCommand::CorrAxesCommand(){
try {
abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
- outputTypes["corr.axes"] = tempOutNames;
+ outputTypes["corraxes"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
}
vector<string> tempOutNames;
- outputTypes["corr.axes"] = tempOutNames;
+ outputTypes["corraxes"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
// calc the r values //
/************************************************************************************/
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + ".corr.axes";
- outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + "." + getOutputFileNameTag("corraxes");
+ outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
vector<string> setParameters();
string getCommandName() { return "corr.axes"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Corr.axes"; }
string getDescription() { return "calculate the correlation coefficient for each column in a shared/relabund file to the axes displayed in a pcoa file"; }
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//get groups you want to remove
- if (accnosfile != "") { readAccnos(); }
+ if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
if (groupfile != "") {
GroupMap groupMap(groupfile);
}
}
//**********************************************************************************************************************
-void CountGroupsCommand::readAccnos(){
- try {
- Groups.clear();
-
- ifstream in;
- m->openInputFile(accnosfile, in);
- string name;
-
- while(!in.eof()){
- in >> name;
-
- Groups.push_back(name);
-
- m->gobble(in);
- }
- in.close();
-
- m->setGroups(Groups);
-
- }
- catch(exception& e) {
- m->errorOut(e, "CountGroupsCommand", "readAccnos");
- exit(1);
- }
-}
-//**********************************************************************************************************************
vector<string> setParameters();
string getCommandName() { return "count.groups"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Count.groups"; }
string getDescription() { return "counts the number of sequences in each group"; }
string sharedfile, groupfile, outputDir, groups, accnosfile;
bool abort;
vector<string> Groups;
-
- void readAccnos();
};
#endif
try {
CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string CountSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The count.seqs command reads a name file and outputs a .seq.count file. You may also provide a group file to get the counts broken down by group.\n";
+ helpString += "The count.seqs aka. make.table command reads a name file and outputs a .count.table file. You may also provide a group file to get the counts broken down by group.\n";
helpString += "The groups parameter allows you to indicate which groups you want to include in the counts, by default all groups in your groupfile are used.\n";
+ helpString += "The large parameter indicates the name and group files are too large to fit in RAM.\n";
helpString += "When you use the groups parameter and a sequence does not represent any sequences from the groups you specify it is not included in the .count.summary file.\n";
helpString += "The count.seqs command should be in the following format: count.seqs(name=yourNameFile).\n";
- helpString += "Example count.seqs(name=amazon.names).\n";
+ helpString += "Example count.seqs(name=amazon.names) or make.table(name=amazon.names).\n";
helpString += "Note: No spaces between parameter labels (i.e. name), '=' and parameters (i.e.yourNameFile).\n";
return helpString;
}
exit(1);
}
}
-
+//**********************************************************************************************************************
+string CountSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "counttable") { outputFileName = "count.table"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
CountSeqsCommand::CountSeqsCommand(){
try {
abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
- outputTypes["summary"] = tempOutNames;
+ outputTypes["counttable"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "CountSeqsCommand", "CountSeqsCommand");
//initialize outputTypes
vector<string> tempOutNames;
- outputTypes["summary"] = tempOutNames;
+ outputTypes["counttable"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = "all"; }
m->splitAtDash(groups, Groups);
+
+ string temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; }
+ large = m->isTrue(temp);
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(namefile); }
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- ofstream out;
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(namefile)) + "seq.count";
- m->openOutputFile(outputFileName, out); outputTypes["summary"].push_back(outputFileName);
- out << "Representative_Sequence\ttotal\t";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("counttable");
- GroupMap* groupMap;
+ int total = 0;
+ if (!large) { total = processSmall(outputFileName); }
+ else { total = processLarge(outputFileName); }
+
+ if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+
+ //set rabund file as new current rabundfile
+ itTypes = outputTypes.find("counttable");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { string current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Total number of sequences: " + toString(total)); m->mothurOutEndLine();
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Name: "); m->mothurOutEndLine();
+ m->mothurOut(outputFileName); m->mothurOutEndLine();
+ m->mothurOutEndLine();
+
+ return 0;
+ }
+
+ catch(exception& e) {
+ m->errorOut(e, "CountSeqsCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int CountSeqsCommand::processSmall(string outputFileName){
+ try {
+ ofstream out;
+ m->openOutputFile(outputFileName, out); outputTypes["counttable"].push_back(outputFileName);
+ outputNames.push_back(outputFileName); outputTypes["counttable"].push_back(outputFileName);
+ out << "Representative_Sequence\ttotal\t";
+
+ GroupMap* groupMap;
if (groupfile != "") {
groupMap = new GroupMap(groupfile); groupMap->readMap();
if (m->control_pressed) { break; }
string firstCol, secondCol;
- in >> firstCol >> secondCol; m->gobble(in);
+ in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
vector<string> names;
m->splitAtChar(secondCol, names, ',');
total += names.size();
}
in.close();
+ out.close();
if (groupfile != "") { delete groupMap; }
+
+ return total;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountSeqsCommand", "processSmall");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int CountSeqsCommand::processLarge(string outputFileName){
+ try {
+ set<string> namesOfGroups;
+ map<string, int> initial;
+ for (set<string>::iterator it = namesOfGroups.begin(); it != namesOfGroups.end(); it++) { initial[(*it)] = 0; }
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["counttable"].push_back(outputFileName);
+ out << "Representative_Sequence\ttotal\t";
+ if (groupfile == "") { out << endl; }
+
+ map<string, unsigned long long> namesToIndex;
+ string outfile = m->getRootName(groupfile) + "sorted.groups.temp";
+ string outName = m->getRootName(namefile) + "sorted.name.temp";
+ map<int, string> indexToName;
+ map<int, string> indexToGroup;
+ if (groupfile != "") {
+ time_t estart = time(NULL);
+ //convert name file to redundant -> unique. set unique name equal to index so we can use vectors, save name for later.
+ string newNameFile = m->getRootName(namefile) + ".name.temp";
+ string newGroupFile = m->getRootName(groupfile) + ".group.temp";
+ indexToName = processNameFile(newNameFile);
+ indexToGroup = getGroupNames(newGroupFile, namesOfGroups);
+
+ //sort file by first column so the names of sequences will be easier to find
+ //use the unix sort
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ string command = "sort -n " + newGroupFile + " -o " + outfile;
+ system(command.c_str());
+ command = "sort -n " + newNameFile + " -o " + outName;
+ system(command.c_str());
+ #else //sort using windows sort
+ string command = "sort " + newGroupFile + " /O " + outfile;
+ system(command.c_str());
+ command = "sort " + newNameFile + " /O " + outName;
+ system(command.c_str());
+ #endif
+ m->mothurRemove(newNameFile);
+ m->mothurRemove(newGroupFile);
+
+ m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to sort and index the group and name files. "); m->mothurOutEndLine();
+ }else { outName = namefile; }
+
+ time_t estart = time(NULL);
+ //open input file
+ ifstream in;
+ m->openInputFile(outName, in);
+
+ //open input file
+ ifstream in2;
- if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+ int total = 0;
+ vector< vector<int> > nameMapCount;
+ if (groupfile != "") {
+ m->openInputFile(outfile, in2);
+ nameMapCount.resize(indexToName.size());
+ for (int i = 0; i < nameMapCount.size(); i++) {
+ nameMapCount[i].resize(indexToGroup.size(), 0);
+ }
+ }
+
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ string firstCol;
+ in >> firstCol; m->gobble(in);
+
+ if (groupfile != "") {
+ int uniqueIndex;
+ in >> uniqueIndex; m->gobble(in);
+
+ string name; int groupIndex;
+ in2 >> name >> groupIndex; m->gobble(in2);
+
+ if (name != firstCol) { m->mothurOut("[ERROR]: found " + name + " in your groupfile, but " + firstCol + " was in your namefile, please correct.\n"); m->control_pressed = true; }
+
+ nameMapCount[uniqueIndex][groupIndex]++;
+ total++;
+ }else {
+ string secondCol;
+ in >> secondCol; m->gobble(in);
+ int num = m->getNumNames(secondCol);
+ out << firstCol << '\t' << num << endl;
+ total += num;
+ }
+ }
+ in.close();
+
+ if (groupfile != "") {
+ m->mothurRemove(outfile);
+ m->mothurRemove(outName);
+ in2.close();
+ for (map<int, string>::iterator it = indexToGroup.begin(); it != indexToGroup.end(); it++) { out << it->second << '\t'; }
+ out << endl;
+ for (int i = 0; i < nameMapCount.size(); i++) {
+ string totalsLine = "";
+ int seqTotal = 0;
+ for (int j = 0; j < nameMapCount[i].size(); j++) {
+ seqTotal += nameMapCount[i][j];
+ totalsLine += toString(nameMapCount[i][j]) + '\t';
+ }
+ out << indexToName[i] << '\t' << seqTotal << '\t' << totalsLine << endl;
+ }
+ }
+
+ out.close();
+
+ m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to create the count table file. "); m->mothurOutEndLine();
+
+ return total;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountSeqsCommand", "processLarge");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+map<int, string> CountSeqsCommand::processNameFile(string name) {
+ try {
+ map<int, string> indexToNames;
+
+ ofstream out;
+ m->openOutputFile(name, out);
+
+ //open input file
+ ifstream in;
+ m->openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+ int count = 0;
+
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = m->splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ //parse names into vector
+ vector<string> theseNames;
+ m->splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { out << theseNames[i] << '\t' << count << endl; }
+ indexToNames[count] = firstCol;
+ pairDone = false;
+ count++;
+ }
+ }
+ }
+ in.close();
+ out.close();
- m->mothurOutEndLine();
- m->mothurOut("Total number of sequences: " + toString(total)); m->mothurOutEndLine();
- m->mothurOutEndLine();
- m->mothurOut("Output File Name: "); m->mothurOutEndLine();
- m->mothurOut(outputFileName); m->mothurOutEndLine();
- m->mothurOutEndLine();
+ return indexToNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountSeqsCommand", "processNameFile");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+map<int, string> CountSeqsCommand::getGroupNames(string filename, set<string>& namesOfGroups) {
+ try {
+ map<int, string> indexToGroups;
+ map<string, int> groupIndex;
+ map<string, int>::iterator it;
+
+ ofstream out;
+ m->openOutputFile(filename, out);
+
+ //open input file
+ ifstream in;
+ m->openInputFile(groupfile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+ int count = 0;
+
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = m->splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ it = groupIndex.find(secondCol);
+ if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
+ groupIndex[secondCol] = count;
+ count++;
+ }
+ out << firstCol << '\t' << groupIndex[secondCol] << endl;
+ namesOfGroups.insert(secondCol);
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+ out.close();
- return 0;
+ for (it = groupIndex.begin(); it != groupIndex.end(); it++) { indexToGroups[it->second] = it->first; }
+
+ return indexToGroups;
}
-
catch(exception& e) {
- m->errorOut(e, "CountSeqsCommand", "execute");
+ m->errorOut(e, "CountSeqsCommand", "getGroupNames");
exit(1);
}
}
//**********************************************************************************************************************
+
+
+
vector<string> setParameters();
string getCommandName() { return "count.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Count.seqs"; }
string getDescription() { return "counts the number of sequences represented by each unique sequence in a namesfile"; }
private:
string namefile, groupfile, outputDir, groups;
- bool abort;
- vector<string> Groups;
+ bool abort, large;
+ vector<string> Groups, outputNames;
+
+ int processSmall(string);
+ int processLarge(string);
+ map<int, string> processNameFile(string);
+ map<int, string> getGroupNames(string, set<string>&);
+
};
#endif
--- /dev/null
+//
+// counttable.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 6/26/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "counttable.h"
+
+
+/************************************************************/
+int CountTable::readTable(string file) {
+ try {
+ filename = file;
+ ifstream in;
+ m->openInputFile(filename, in);
+
+ string headers = m->getline(in); m->gobble(in);
+ vector<string> columnHeaders = m->splitWhiteSpace(headers);
+
+ int numGroups = 0;
+ groups.clear();
+ totalGroups.clear();
+ indexGroupMap.clear();
+ indexNameMap.clear();
+ counts.clear();
+ map<int, string> originalGroupIndexes;
+ if (columnHeaders.size() > 2) { hasGroups = true; numGroups = columnHeaders.size() - 2; }
+ for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; totalGroups.push_back(0); }
+ //sort groups to keep consistent with how we store the groups in groupmap
+ sort(groups.begin(), groups.end());
+ for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
+ m->setAllGroups(groups);
+
+ bool error = false;
+ string name;
+ int thisTotal;
+ uniques = 0;
+ total = 0;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ in >> name; m->gobble(in); in >> thisTotal; m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); }
+
+ //if group info, then read it
+ vector<int> groupCounts; groupCounts.resize(numGroups, 0);
+ for (int i = 0; i < numGroups; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; m->gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; }
+
+ map<string, int>::iterator it = indexNameMap.find(name);
+ if (it == indexNameMap.end()) {
+ if (hasGroups) { counts.push_back(groupCounts); }
+ indexNameMap[name] = uniques;
+ totals.push_back(thisTotal);
+ total += thisTotal;
+ uniques++;
+ }else {
+ error = true;
+ m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();
+ }
+ }
+ in.close();
+
+ if (error) { m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "readTable");
+ exit(1);
+ }
+}
+/************************************************************/
+//group counts for a seq
+vector<int> CountTable::getGroupCounts(string seqName) {
+ try {
+ vector<int> temp;
+ if (hasGroups) {
+ map<string, int>::iterator it = indexNameMap.find(seqName);
+ if (it == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ temp = counts[it->second];
+ }
+ }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+
+ return temp;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getGroupCounts");
+ exit(1);
+ }
+}
+/************************************************************/
+//total number of sequences for the group
+int CountTable::getGroupCount(string groupName) {
+ try {
+ if (hasGroups) {
+ map<string, int>::iterator it = indexGroupMap.find(groupName);
+ if (it == indexGroupMap.end()) {
+ m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ return totalGroups[it->second];
+ }
+ }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getGroupCount");
+ exit(1);
+ }
+}
+/************************************************************/
+//total number of sequences for the seq for the group
+int CountTable::getGroupCount(string seqName, string groupName) {
+ try {
+ if (hasGroups) {
+ map<string, int>::iterator it = indexGroupMap.find(groupName);
+ if (it == indexGroupMap.end()) {
+ m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ map<string, int>::iterator it2 = indexNameMap.find(seqName);
+ if (it2 == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ return counts[it2->second][it->second];
+ }
+ }
+ }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getGroupCount");
+ exit(1);
+ }
+}
+/************************************************************/
+//total number of seqs represented by seq
+int CountTable::getNumSeqs(string seqName) {
+ try {
+
+ map<string, int>::iterator it = indexNameMap.find(seqName);
+ if (it == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ return totals[it->second];
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getNumSeqs");
+ exit(1);
+ }
+}
+/************************************************************/
+//returns unique index for sequence like get in NameAssignment
+int CountTable::get(string seqName) {
+ try {
+
+ map<string, int>::iterator it = indexNameMap.find(seqName);
+ if (it == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else { return it->second; }
+
+ return -1;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "get");
+ exit(1);
+ }
+}
+/************************************************************/
+//create ListVector from uniques
+ListVector CountTable::getListVector() {
+ try {
+ ListVector list(indexNameMap.size());
+ for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+ if (m->control_pressed) { break; }
+ list.set(it->second, it->first);
+ }
+ return list;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getListVector");
+ exit(1);
+ }
+}
+
+/************************************************************/
+//returns the names of all unique sequences in file
+vector<string> CountTable::getNamesOfSeqs() {
+ try {
+ vector<string> names;
+ for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+ names.push_back(it->first);
+ }
+
+ return names;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getNamesOfSeqs");
+ exit(1);
+ }
+}
+/************************************************************/
+//returns names of seqs
+int CountTable::mergeCounts(string seq1, string seq2) {
+ try {
+ map<string, int>::iterator it = indexNameMap.find(seq1);
+ if (it == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seq1 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ map<string, int>::iterator it2 = indexNameMap.find(seq2);
+ if (it2 == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ //merge data
+ for (int i = 0; i < groups.size(); i++) {
+ counts[it->second][i] += counts[it2->second][i];
+ counts[it2->second][i] = 0;
+ }
+ totals[it->second] += totals[it2->second];
+ totals[it2->second] = 0;
+ uniques--;
+ indexNameMap.erase(it2);
+ }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getNamesOfSeqs");
+ exit(1);
+ }
+}
+
+/************************************************************/
+
+
--- /dev/null
+#ifndef Mothur_counttable_h
+#define Mothur_counttable_h
+
+
+//
+// counttable.h
+// Mothur
+//
+// Created by Sarah Westcott on 6/26/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+//This class is designed to read a count table file and store its data.
+//count table files look like:
+
+/*
+ Representative_Sequence total F003D000 F003D002 F003D004 F003D006 F003D008 F003D142 F003D144 F003D146 F003D148 F003D150 MOCK.GQY1XT001
+ GQY1XT001C296C 6051 409 985 923 937 342 707 458 439 387 464 0
+ GQY1XT001A3TJI 4801 396 170 413 442 306 769 581 576 497 651 0
+ GQY1XT001CS2B8 3018 263 226 328 460 361 336 248 290 187 319 0
+ GQY1XT001CD9IB 2736 239 177 256 405 306 286 263 248 164 392 0
+
+ or if no group info was used to create it
+
+ Representative_Sequence total
+ GQY1XT001C296C 6051
+ GQY1XT001A3TJI 4801
+ GQY1XT001CS2B8 3018
+ GQY1XT001CD9IB 2736
+ GQY1XT001ARCB1 2183
+ GQY1XT001CNF2P 2796
+ GQY1XT001CJMDA 1667
+ GQY1XT001CBVJB 3758
+
+
+ */
+
+
+#include "mothurout.h"
+#include "listvector.hpp"
+
+class CountTable {
+
+ public:
+
+ CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; }
+ ~CountTable() {}
+
+ int readTable(string);
+
+ bool hasGroupInfo() { return hasGroups; }
+ int getNumGroups() { return groups.size(); }
+ vector<string> getNamesOfGroups() { return groups; } //returns group names, if no group info vector is blank.
+
+ vector<int> getGroupCounts(string); //returns group counts for a seq passed in, if no group info is in file vector is blank. Order is the same as the groups returned by getGroups function.
+ int getGroupCount(string, string); //returns number of seqs for that group for that seq
+ int getGroupCount(string); // returns total seqs for that group
+ int getNumSeqs(string); //returns total seqs for that seq
+ int getNumSeqs() { return total; } //return total number of seqs
+ int getNumUniqueSeqs() { return uniques; } //return number of unique/representative seqs
+ int getGroupIndex(string); //returns index in getGroupCounts vector of specific group
+ vector<string> getNamesOfSeqs();
+ int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in.
+ int get(string); //returns unique sequence index for reading distance matrices like NameAssignment
+ ListVector getListVector();
+ int size() { return indexNameMap.size(); }
+
+ private:
+ string filename;
+ MothurOut* m;
+ bool hasGroups;
+ int total, uniques;
+ vector<string> groups;
+ vector< vector<int> > counts;
+ vector<int> totals;
+ vector<int> totalGroups;
+ map<string, int> indexNameMap;
+ map<string, int> indexGroupMap;
+
+};
+
+#endif
CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pcontaxonomy);
- CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
+ CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none",false,false); parameters.push_back(plist);
+ CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none",false,false); parameters.push_back(pshared);
CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
string CreateDatabaseCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The create.database command reads a listfile, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
- helpString += "The create.database command parameters are repfasta, list, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
+ helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
+ helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile).\n";
exit(1);
}
}
+//**********************************************************************************************************************
+string CreateDatabaseCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "database") { outputFileName = "database"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CreateDatabaseCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
CreateDatabaseCommand::CreateDatabaseCommand(){
try {
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
}
//check for required parameters
listfile = validParameter.validFile(parameters, "list", true);
- if (listfile == "not found") {
- //if there is a current list file, use it
+ if (listfile == "not found") { listfile = ""; }
+ else if (listfile == "not open") { listfile = ""; abort = true; }
+ else { m->setListFile(listfile); }
+
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not found") { sharedfile = ""; }
+ else if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else { m->setSharedFile(sharedfile); }
+
+ if ((sharedfile == "") && (listfile == "")) {
+ //is there are current file available for either of these?
+ //give priority to list, then shared
listfile = m->getListFile();
if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
- else { m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
+ else {
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("No valid current files. You must provide a shared or list file before you can use the create.database command."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }
}
- else if (listfile == "not open") { abort = true; }
- else { m->setListFile(listfile); }
+ else if ((sharedfile != "") && (listfile != "")) { m->mothurOut("When executing a create.database command you must enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true; }
+
+ if (sharedfile != "") { if (outputDir == "") { outputDir = m->hasPath(sharedfile); } }
+ else { if (outputDir == "") { outputDir = m->hasPath(listfile); } }
contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
if (contaxonomyfile == "not found") { //if there is a current list file, use it
//taxonomies holds the taxonomy info for each Otu
//classifyOtuSizes holds the size info of each Otu to help with error checking
vector<string> taxonomies;
- vector<int> classifyOtuSizes = readTax(taxonomies);
+ vector<string> otuLabels;
+ vector<int> classifyOtuSizes = readTax(taxonomies, otuLabels);
if (m->control_pressed) { return 0; }
//names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
map<string, string> repNames;
- int numUniqueNamesFile = readNames(repNames);
+ int numUniqueNamesFile = m->readNames(repnamesfile, repNames, 1);
//are there the same number of otus in the fasta and name files
if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->control_pressed = true; }
if (m->control_pressed) { return 0; }
- //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
- ListVector* list = getList();
-
- if (m->control_pressed) { delete list; return 0; }
-
- GroupMap* groupmap = NULL;
- if (groupfile != "") {
- groupmap = new GroupMap(groupfile);
- groupmap->readMap();
- }
-
- if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
- if (outputDir == "") { outputDir += m->hasPath(listfile); }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + "database";
+ string outputFileName = "";
+ if (listfile != "") { outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("database"); }
+ else { outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("database"); }
outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
string header = "OTUNumber\tAbundance\t";
- if (groupfile != "") {
- header = "OTUNumber\t";
- for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
- }
- header += "repSeqName\trepSeq\tOTUConTaxonomy";
- out << header << endl;
+
- for (int i = 0; i < list->getNumBins(); i++) {
+ if (listfile != "") {
+ //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
+ ListVector* list = getList();
- if (m->control_pressed) { break; }
-
- out << (i+1) << '\t';
+ if (otuLabels.size() != list->getNumBins()) {
+ m->mothurOut("[ERROR]: you have " + toString(otuLabels.size()) + " otus in your contaxonomy file, but your list file has " + toString(list->getNumBins()) + " otus. These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true; }
- vector<string> binNames;
- string bin = list->get(i);
+ if (m->control_pressed) { delete list; return 0; }
- map<string, string>::iterator it = repNames.find(bin);
- if (it == repNames.end()) {
- m->mothurOut("[ERROR: OTU " + toString(i+1) + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ GroupMap* groupmap = NULL;
+ if (groupfile != "") {
+ groupmap = new GroupMap(groupfile);
+ groupmap->readMap();
}
- m->splitAtComma(bin, binNames);
+ if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
- //sanity check
- if (binNames.size() != classifyOtuSizes[i]) {
- m->mothurOut("[ERROR: OTU " + toString(i+1) + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ if (groupfile != "") {
+ header = "OTUNumber\t";
+ for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
}
+ header += "repSeqName\trepSeq\tOTUConTaxonomy";
+ out << header << endl;
- //output abundances
- if (groupfile != "") {
- string groupAbunds = "";
- map<string, int> counts;
- //initialize counts to 0
- for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
+ for (int i = 0; i < list->getNumBins(); i++) {
- //find abundances by group
- bool error = false;
- for (int j = 0; j < binNames.size(); j++) {
- string group = groupmap->getGroup(binNames[j]);
- if (group == "not found") {
- m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
- error = true;
- }else { counts[group]++; }
+ if (m->control_pressed) { break; }
+
+ out << otuLabels[i] << '\t';
+
+ vector<string> binNames;
+ string bin = list->get(i);
+
+ map<string, string>::iterator it = repNames.find(bin);
+ if (it == repNames.end()) {
+ m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ }
+
+ m->splitAtComma(bin, binNames);
+
+ //sanity check
+ if (binNames.size() != classifyOtuSizes[i]) {
+ m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
}
- //output counts
- for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t'; }
+ //output abundances
+ if (groupfile != "") {
+ string groupAbunds = "";
+ map<string, int> counts;
+ //initialize counts to 0
+ for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
+
+ //find abundances by group
+ bool error = false;
+ for (int j = 0; j < binNames.size(); j++) {
+ string group = groupmap->getGroup(binNames[j]);
+ if (group == "not found") {
+ m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
+ error = true;
+ }else { counts[group]++; }
+ }
+
+ //output counts
+ for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t'; }
+
+ if (error) { m->control_pressed = true; }
+ }else { out << binNames.size() << '\t'; }
- if (error) { m->control_pressed = true; }
- }else { out << binNames.size() << '\t'; }
+ //output repSeq
+ out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+ }
+
- //output repSeq
- out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+ delete list;
+ if (groupfile != "") { delete groupmap; }
+
+ }else {
+ vector<SharedRAbundVector*> lookup = getShared();
+
+ header = "OTUNumber\t";
+ for (int i = 0; i < lookup.size(); i++) { header += lookup[i]->getGroup() + '\t'; }
+ header += "repSeqName\trepSeq\tOTUConTaxonomy";
+ out << header << endl;
+
+ for (int h = 0; h < lookup[0]->getNumBins(); h++) {
+
+ if (m->control_pressed) { break; }
+
+ int index = findIndex(otuLabels, m->currentBinLabels[h]);
+ if (index == -1) { m->mothurOut("[ERROR]: " + m->currentBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
+
+ if (m->control_pressed) { break; }
+
+ out << otuLabels[index] << '\t';
+
+ int totalAbund = 0;
+ for (int i = 0; i < lookup.size(); i++) {
+ int abund = lookup[i]->getAbundance(h);
+ totalAbund += abund;
+ out << abund << '\t';
+ }
+
+ //sanity check
+ if (totalAbund != classifyOtuSizes[index]) {
+ m->mothurOut("[WARNING]: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true; break;
+ }
+
+ //output repSeq
+ out << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl;
+ }
}
out.close();
-
- delete list;
- if (groupfile != "") { delete groupmap; }
-
if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
m->mothurOutEndLine();
}
}
//**********************************************************************************************************************
-vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies){
+int CreateDatabaseCommand::findIndex(vector<string>& otuLabels, string label){
+ try {
+ int index = -1;
+ for (int i = 0; i < otuLabels.size(); i++) {
+ if (otuLabels[i] == label) { index = i; break; }
+ }
+ return index;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CreateDatabaseCommand", "findIndex");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies, vector<string>& otuLabels){
try {
vector<int> sizes;
sizes.push_back(size);
taxonomies.push_back(tax);
+ otuLabels.push_back(otu);
}
in.close();
exit(1);
}
}
-/**********************************************************************************************************************/
-int CreateDatabaseCommand::readNames(map<string, string>& nameMap) {
- try {
-
- //open input file
- ifstream in;
- m->openInputFile(repnamesfile, in);
-
- while (!in.eof()) {
- if (m->control_pressed) { break; }
-
- string firstCol, secondCol;
- in >> firstCol >> secondCol; m->gobble(in);
-
- nameMap[secondCol] = firstCol;
- }
- in.close();
-
- return nameMap.size();
-
- }
- catch(exception& e) {
- m->errorOut(e, "CreateDatabaseCommand", "readNames");
- exit(1);
- }
-}
//**********************************************************************************************************************
ListVector* CreateDatabaseCommand::getList(){
try {
}
}
//**********************************************************************************************************************
+vector<SharedRAbundVector*> CreateDatabaseCommand::getShared(){
+ try {
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+ if (label == "") { label = lastLabel; return lookup; }
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> labels; labels.insert(label);
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && (userLabels.size() != 0)) {
+ if (m->control_pressed) { return lookup; }
+
+ if(labels.count(lookup[0]->getLabel()) == 1){
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ break;
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ break;
+ }
+
+ lastLabel = lookup[0]->getLabel();
+
+ //get next line to process
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors();
+ }
+
+
+ if (m->control_pressed) { return lookup; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+ }
+
+ return lookup;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CreateDatabaseCommand", "getList");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
vector<string> setParameters();
string getCommandName() { return "create.database"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Create.database"; }
string getDescription() { return "creates database file that includes, abundances across groups, representative sequences, and taxonomy for each OTU"; }
private:
bool abort;
- string listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir;
+ string sharedfile, listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir;
vector<string> outputNames;
vector<int> readFasta(vector<Sequence>&);
- vector<int> readTax(vector<string>&);
- int readNames(map<string, string>&);
+ vector<int> readTax(vector<string>&, vector<string>&);
ListVector* getList();
+ vector<SharedRAbundVector*> getShared();
+ int findIndex(vector<string>&, string);
};
}
}
//**********************************************************************************************************************
+string DeconvoluteCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "unique" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "names"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DeconvoluteCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
DeconvoluteCommand::DeconvoluteCommand(){
try {
abort = true; calledHelp = true;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//prepare filenames and open files
- string outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "names";
- string outFastaFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "unique" + m->getExtension(inFastaName);
+ string outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + getOutputFileNameTag("name");
+ string outFastaFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + getOutputFileNameTag("fasta", inFastaName);
map<string, string> nameMap;
map<string, string>::iterator itNames;
if (oldNameMapFName != "") {
m->readNames(oldNameMapFName, nameMap);
- if (oldNameMapFName == outNameFile){ outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "unique.names"; }
+ if (oldNameMapFName == outNameFile){ outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "unique." + getOutputFileNameTag("name"); }
}
if (m->control_pressed) { return 0; }
vector<string> setParameters();
string getCommandName() { return "unique.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Unique.seqs"; }
string getDescription() { return "creates a fasta containing the unique sequences as well as a namesfile with the names each sequence represents"; }
}
}
//**********************************************************************************************************************
+string DegapSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "ng.fasta"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DegapSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
DegapSeqsCommand::DegapSeqsCommand(){
try {
abort = true; calledHelp = true;
ofstream outFASTA;
string tempOutputDir = outputDir;
if (outputDir == "") { tempOutputDir = m->hasPath(fastaFileNames[s]); }
- string degapFile = tempOutputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "ng.fasta";
+ string degapFile = tempOutputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("fasta");
m->openOutputFile(degapFile, outFASTA);
while(!inFASTA.eof()){
vector<string> setParameters();
string getCommandName() { return "degap.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Degap.seqs"; }
string getDescription() { return "removes gap characters from sequences"; }
}
}
//**********************************************************************************************************************
+string DeUniqueSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "redundant.fasta"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DeUniqueSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
DeUniqueSeqsCommand::DeUniqueSeqsCommand(){
try {
abort = true; calledHelp = true;
string outFastaFile = m->getRootName(m->getSimpleName(fastaFile));
int pos = outFastaFile.find("unique");
if (pos != string::npos) {
- outFastaFile = outputDir + outFastaFile.substr(0, pos) + "redundant" + m->getExtension(fastaFile);
+ outFastaFile = outputDir + outFastaFile.substr(0, pos) + getOutputFileNameTag("fasta");
}else{
- outFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "redundant" + m->getExtension(fastaFile);
+ outFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("fasta");
}
m->openOutputFile(outFastaFile, out);
vector<string> setParameters();
string getCommandName() { return "deunique.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Deunique.seqs"; }
string getDescription() { return "reverse of the unique.seqs command, and creates a fasta file from a fasta and name file"; }
}
}
//**********************************************************************************************************************
+string DeuniqueTreeCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "tree") { outputFileName = "deunique.tre"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DeuniqueTreeCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
DeuniqueTreeCommand::DeuniqueTreeCommand(){
try {
abort = true; calledHelp = true;
delete reader;
//print new Tree
- string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + "deunique.tre";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("tree");
outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
ofstream out;
m->openOutputFile(outputFile, out);
vector<string> setParameters();
string getCommandName() { return "deunique.tree"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Deunique.tree"; }
string getDescription() { return "add the redundant sequence names back into a tree of unique sequences"; }
CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(pcolumn);
CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(poldfasta);
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
+ CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(poutput);
CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
}
}
//**********************************************************************************************************************
+string DistanceCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "phylip") { outputFileName = "dist"; }
+ else if (type == "column") { outputFileName = "dist"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DistanceCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
DistanceCommand::DistanceCommand(){
try {
abort = true; calledHelp = true;
convert(temp, compress);
output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; }
+ if (output == "phylip") { output = "lt"; }
if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; }
string outputFile;
if (output == "lt") { //does the user want lower triangle phylip formatted file
- outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "phylip.dist";
+ outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "phylip." + getOutputFileNameTag("phylip");
m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile);
//output numSeqs to phylip formatted dist file
}else if (output == "column") { //user wants column format
- outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
+ outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("column");
outputTypes["column"].push_back(outputFile);
//so we don't accidentally overwrite
m->mothurRemove(outputFile);
}else { //assume square
- outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "square.dist";
+ outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "square." + getOutputFileNameTag("phylip");
m->mothurRemove(outputFile);
outputTypes["phylip"].push_back(outputFile);
}
vector<string> setParameters();
string getCommandName() { return "dist.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Schloss PD (2010). The effects of alignment quality, distance calculation method, sequence filtering, and region on the analysis of 16S rRNA gene-based studies. PLoS Comput Biol 6: e1000844. \nhttp://www.mothur.org/wiki/Dist.seqs"; }
string getDescription() { return "calculate the pairwaise distances between aligned sequences"; }
}
}
//**********************************************************************************************************************
+string FilterSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "filter.fasta"; }
+ else if (type == "filter") { outputFileName = "filter"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "FilterSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
FilterSeqsCommand::FilterSeqsCommand(){
try {
abort = true; calledHelp = true;
//prevent giantic file name
string filterFile;
- if (fastafileNames.size() > 3) { filterFile = outputDir + "merge.filter"; }
- else { filterFile = outputDir + filterFileName + ".filter"; }
+ if (fastafileNames.size() > 3) { filterFile = outputDir + "merge." + getOutputFileNameTag("filter"); }
+ else { filterFile = outputDir + filterFileName + "." + getOutputFileNameTag("filter"); }
m->openOutputFile(filterFile, outFilter);
outFilter << filter << endl;
for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- string filteredFasta = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s])) + "filter.fasta";
+ string filteredFasta = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s])) + getOutputFileNameTag("fasta");
#ifdef USE_MPI
int pid, numSeqsPerProcessor, num;
int tag = 2001;
vector<string> setParameters();\r
string getCommandName() { return "filter.seqs"; }\r
string getCommandCategory() { return "Sequence Processing"; }\r
+ string getOutputFileNameTag(string, string);\r
string getHelpString(); \r
string getCitation() { return "http://www.mothur.org/wiki/Filter.seqs"; }\r
string getDescription() { return "removes columns from alignments based on a criteria defined by the user"; }\r
vector<string> setParameters();
string getCommandName() { return "get.commandinfo"; }
string getCommandCategory() { return "Hidden"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getHelpString();
string getCitation() { return "no citation"; }
string getDescription() { return "get.commandinfo"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string GetCoreMicroBiomeCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "coremicrobiome") { outputFileName = "core.microbiome"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetCoreMicroBiomeCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
GetCoreMicroBiomeCommand::GetCoreMicroBiomeCommand(){
try {
int GetCoreMicroBiomeCommand::createTable(vector<SharedRAbundFloatVector*>& lookup){
try {
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".core.microbiome";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("coremicrobiome");
outputNames.push_back(outputFileName); outputTypes["coremicrobiome"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
string getCommandName() { return "get.coremicrobiome"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
//commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.coremicrobiome"; }
string getDescription() { return "determines the fraction of OTUs that are found in varying numbers of samples for different minimum relative abundances"; }
m->setFlowFile("");
}else if (types[i] == "biom") {
m->setBiomFile("");
+ }else if (types[i] == "count") {
+ m->setCountTableFile("");
}else if (types[i] == "processors") {
m->setProcessors("1");
}else if (types[i] == "all") {
vector<string> setParameters();
string getCommandName() { return "get.current"; }
string getCommandCategory() { return "General"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string) { return ""; }
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.current"; }
string getDescription() { return "get current files saved by mothur"; }
try {
string helpString = "";
helpString += "The get.group command parameter is shared and it's required if you have no valid current file.\n";
- //m->mothurOut("The get.group command outputs a .bootGroups file to you can use in addition to the tree file generated by the bootstrap.shared command to run the consensus command.\n");
helpString += "You may not use any parameters with the get.group command.\n";
helpString += "The get.group command should be in the following format: \n";
helpString += "get.group()\n";
exit(1);
}
}
+
//**********************************************************************************************************************
GetgroupCommand::GetgroupCommand(){
try {
abort = true; calledHelp = true;
setParameters();
- vector<string> tempOutNames;
- outputTypes["bootgroup"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "GetgroupCommand", "GetgroupCommand");
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
- //initialize outputTypes
- vector<string> tempOutNames;
- outputTypes["bootgroup"] = tempOutNames;
-
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
- //open output file
- outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "bootGroups";
- m->openOutputFile(outputFile, out);
-
+
InputData input(sharedfile, "sharedfile");
vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
for (int i = 0; i < lookup.size(); i++) {
- out << lookup[i]->getGroup() << '\t' << lookup[i]->getGroup() << endl;
m->mothurOut(lookup[i]->getGroup()); m->mothurOutEndLine();
delete lookup[i];
}
-
- out.close();
-
- if (m->control_pressed) { m->mothurRemove(outputFile); return 0; }
-
+
m->mothurOutEndLine();
m->mothurOut("Output File Name: "); m->mothurOutEndLine();
- m->mothurOut(outputFile); m->mothurOutEndLine(); outputNames.push_back(outputFile); outputTypes["bootgroup"].push_back(outputFile);
m->mothurOutEndLine();
return 0;
exit(1);
}
}
+//**********************************************************************************************************************
vector<string> setParameters();
string getCommandName() { return "get.group"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.group"; }
string getDescription() { return "outputs group names"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string GetGroupsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetGroupsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
GetGroupsCommand::GetGroupsCommand(){
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//get groups you want to remove
- if (accnosfile != "") { readAccnos(); }
+ if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
if (groupfile != "") {
groupMap = new GroupMap(groupfile);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
ofstream out;
m->openOutputFile(outputFileName, out);
while(lookup[0] != NULL) {
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + ".pick" + m->getExtension(sharedfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("shared", sharedfile);
ofstream out;
m->openOutputFile(outputFileName, out);
outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(designfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(designfile)) + "pick" + m->getExtension(designfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(designfile)) + getOutputFileNameTag("design", designfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
ofstream out;
m->openOutputFile(outputFileName, out);
}
}
//**********************************************************************************************************************
-void GetGroupsCommand::readAccnos(){
- try {
- Groups.clear();
-
- ifstream in;
- m->openInputFile(accnosfile, in);
- string name;
-
- while(!in.eof()){
- in >> name;
-
- Groups.push_back(name);
-
- m->gobble(in);
- }
- in.close();
-
- m->setGroups(Groups);
-
- }
- catch(exception& e) {
- m->errorOut(e, "GetGroupsCommand", "readAccnos");
- exit(1);
- }
-}
-//**********************************************************************************************************************
int GetGroupsCommand::fillNames(){
try {
vector<string> seqs = groupMap->getNamesSeqs();
vector<string> setParameters();
string getCommandName() { return "get.groups"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.groups"; }
string getDescription() { return "gets sequences from a list, fasta, name, group, shared, design or taxonomy file from a given group or set of groups"; }
int readFasta();
int readName();
int readGroup();
- void readAccnos();
int readList();
int readTax();
int fillNames();
vector<string> setParameters();
string getCommandName() { return "get.label"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.label"; }
string getDescription() { return "outputs labels"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string GetLineageCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "alignreport") { outputFileName = "pick.align.report"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetLineageCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
GetLineageCommand::GetLineageCommand(){
try {
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
ofstream out;
m->openOutputFile(outputFileName, out);
vector<string> setParameters();
string getCommandName() { return "get.lineage"; }
string getCommandCategory() { return "Phylotype Analysis"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.lineage"; }
string getDescription() { return "gets sequences from a list, fasta, name, group, alignreport or taxonomy file from a given taxonomy or set of taxonomies"; }
}
}
//**********************************************************************************************************************
+string GetListCountCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "otu") { outputFileName = "otu"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetListCountCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
GetListCountCommand::GetListCountCommand(){
try {
abort = true; calledHelp = true;
try {
string binnames;
if (outputDir == "") { outputDir += m->hasPath(listfile); }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + ".otu";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + "." +getOutputFileNameTag("otu");
m->openOutputFile(outputFileName, out);
outputNames.push_back(outputFileName); outputTypes["otu"].push_back(outputFileName);
string getCitation() { return "http://www.mothur.org/wiki/Get.otulist"; }
string getDescription() { return "lists each OTU number and the sequence contained in that OTU"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
int execute();
string helpString = "";
helpString += "The get.otulabels command can be used to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
helpString += "The get.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n";
- helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n";
- helpString += "The otucorr parameter is input the results of the otu.association command.\n";
- helpString += "The corraxes parameter is input the results of the corr.axes command.\n";
+ helpString += "The constaxonomy parameter is used to input the results of the classify.otu command.\n";
+ helpString += "The otucorr parameter is used to input the results of the otu.association command.\n";
+ helpString += "The corraxes parameter is used to input the results of the corr.axes command.\n";
helpString += "The get.otulabels commmand should be in the following format: \n";
helpString += "get.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n";
return helpString;
}
}
//**********************************************************************************************************************
+string GetOtuLabelsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "constaxonomy") { outputFileName = "pick.taxonomy"; }
+ else if (type == "otucorr") { outputFileName = "pick.corr"; }
+ else if (type == "corraxes") { outputFileName = "pick.axes"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
GetOtuLabelsCommand::GetOtuLabelsCommand(){
try {
abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
- outputTypes["contaxonomy"] = tempOutNames;
- outputTypes["otu.corr"] = tempOutNames;
- outputTypes["corr.axes"] = tempOutNames;
+ outputTypes["constaxonomy"] = tempOutNames;
+ outputTypes["otucorr"] = tempOutNames;
+ outputTypes["corraxes"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "GetOtuLabelsCommand", "GetOtuLabelsCommand");
}
vector<string> tempOutNames;
- outputTypes["contaxonomy"] = tempOutNames;
- outputTypes["otu.corr"] = tempOutNames;
- outputTypes["corr.axes"] = tempOutNames;
+ outputTypes["constaxonomy"] = tempOutNames;
+ outputTypes["otucorr"] = tempOutNames;
+ outputTypes["corraxes"] = tempOutNames;
//check for parameters
accnosfile = validParameter.validFile(parameters, "accnos", true);
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//get labels you want to keep
- readAccnos();
+ labels = m->readAccnos(accnosfile);
if (m->control_pressed) { return 0; }
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(constaxonomyfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomyfile)) + "pick.taxonomy";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomyfile)) + getOutputFileNameTag("constaxonomy");
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(otucorrfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(otucorrfile)) + "pick.corr";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(otucorrfile)) + getOutputFileNameTag("otucorr");
ofstream out;
m->openOutputFile(outputFileName, out);
out.close();
if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file."); m->mothurOutEndLine(); }
- outputNames.push_back(outputFileName); outputTypes["otu.corr"].push_back(outputFileName);
+ outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName);
m->mothurOut("Selected " + toString(selectedCount) + " lines from your otu.corr file."); m->mothurOutEndLine();
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(corraxesfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(corraxesfile)) + "pick.axes";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(corraxesfile)) + getOutputFileNameTag("corraxes");
ofstream out;
m->openOutputFile(outputFileName, out);
out.close();
if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file."); m->mothurOutEndLine(); }
- outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);
+ outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName);
m->mothurOut("Selected " + toString(selectedCount) + " lines from your corr.axes file."); m->mothurOutEndLine();
exit(1);
}
}
-
-//**********************************************************************************************************************
-int GetOtuLabelsCommand::readAccnos(){
- try {
-
- ifstream in;
- m->openInputFile(accnosfile, in);
- string name;
-
- while(!in.eof()){
- in >> name;
-
- labels.insert(name);
-
- m->gobble(in);
- }
- in.close();
-
- return 0;
-
- }
- catch(exception& e) {
- m->errorOut(e, "GetOtuLabelsCommand", "readAccnos");
- exit(1);
- }
-}
//**********************************************************************************************************************
vector<string> setParameters();
string getCommandName() { return "get.otulabels"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.otulabels"; }
string getDescription() { return "Can be used with output from classify.otu, otu.association, or corr.axes to select specific otus."; }
int readClassifyOtu();
int readOtuAssociation();
int readCorrAxes();
- int readAccnos();
-
};
/**************************************************************************************************/
}
}
//**********************************************************************************************************************
+string GetOTURepCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "rep.fasta"; }
+ else if (type == "name") { outputFileName = "rep.names"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOTURepCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
GetOTURepCommand::GetOTURepCommand(){
try {
abort = true; calledHelp = true;
list = readMatrix->getListVector();
- SparseMatrix* matrix = readMatrix->getMatrix();
+ SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
// Create a data structure to quickly access the distance information.
// It consists of a vector of distance maps, where each map contains
// all distances of a certain sequence. Vector and maps are accessed
// via the index of a sequence in the distance matrix
seqVec = vector<SeqMap>(list->size());
- for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
- if (m->control_pressed) { delete readMatrix; return 0; }
- seqVec[currentCell->row][currentCell->column] = currentCell->dist;
+ for (int i = 0; i < matrix->seqVec.size(); i++) {
+ for (int j = 0; j < matrix->seqVec[i].size(); j++) {
+ if (m->control_pressed) { delete readMatrix; return 0; }
+ //already added everyone else in row
+ if (i < matrix->seqVec[i][j].index) { seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist; }
+ }
}
//add dummy map for unweighted calc
SeqMap dummy;
map<string, ofstream*> filehandles;
if (Groups.size() == 0) { //you don't want to use groups
- outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
+ outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + getOutputFileNameTag("name");
m->openOutputFile(outputNamesFile, newNamesOutput);
outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
outputNameFiles[outputNamesFile] = processList->getLabel();
for (int i=0; i<Groups.size(); i++) {
temp = new ofstream;
filehandles[Groups[i]] = temp;
- outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".rep.names";
+ outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + "." + getOutputFileNameTag("name");
m->openOutputFile(outputNamesFile, *(temp));
outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
//create output file
if (outputDir == "") { outputDir += m->hasPath(listfile); }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + ".rep.fasta";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + "." + getOutputFileNameTag("fasta");
m->openOutputFile(outputFileName, out);
vector<repStruct> reps;
outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
#include "readmatrix.hpp"
#include "formatmatrix.h"
-typedef list<PCell>::iterator MatData;
typedef map<int, float> SeqMap;
struct repStruct {
vector<string> setParameters();
string getCommandName() { return "get.oturep"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.oturep"; }
string getDescription() { return "gets a representative sequence for each OTU"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string GetOtusCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtusCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
GetOtusCommand::GetOtusCommand(){
try {
groupMap->readMap();
//get groups you want to get
- if (accnosfile != "") { readAccnos(); }
+ if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
//make sure groups are valid
//takes care of user setting groupNames that are invalid or setting groups=all
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick." + label + m->getExtension(listfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + label + "." + getOutputFileNameTag("list", listfile);
ofstream out;
m->openOutputFile(outputFileName, out);
string GroupOutputDir = outputDir;
if (outputDir == "") { GroupOutputDir += m->hasPath(groupfile); }
- string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick." + label + m->getExtension(groupfile);
+ string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + label + "." + getOutputFileNameTag("group", groupfile);
ofstream outGroup;
m->openOutputFile(outputGroupFileName, outGroup);
}
}
//**********************************************************************************************************************
-void GetOtusCommand::readAccnos(){
- try {
- Groups.clear();
-
- ifstream in;
- m->openInputFile(accnosfile, in);
- string name;
-
- while(!in.eof()){
- in >> name;
-
- Groups.push_back(name);
-
- m->gobble(in);
- }
- in.close();
-
- }
- catch(exception& e) {
- m->errorOut(e, "GetOtusCommand", "readAccnos");
- exit(1);
- }
-}
-//**********************************************************************************************************************
vector<string> setParameters();
string getCommandName() { return "get.otus"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.otus"; }
string getDescription() { return "outputs a new list file containing the otus containing sequences from the groups specified"; }
vector<string> outputNames, Groups;
GroupMap* groupMap;
- void readAccnos();
int readListGroup();
int processList(ListVector*&, GroupMap*&, ofstream&, ofstream&, bool&);
exit(1);
}
}
-
+//**********************************************************************************************************************
+string GetRAbundCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "rabund") { outputFileName = "rabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetRAbundCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
GetRAbundCommand::GetRAbundCommand(){
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- filename = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "rabund";
+ filename = outputDir + m->getRootName(m->getSimpleName(inputfile)) + getOutputFileNameTag("rabund");
m->openOutputFile(filename, out);
input = new InputData(inputfile, format);
vector<string> setParameters();
string getCommandName() { return "get.rabund"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.rabund"; }
string getDescription() { return "creates a rabund file"; }
}
}
//**********************************************************************************************************************
+string GetRelAbundCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "relabund") { outputFileName = "relabund" ; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetRelAbundCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
GetRelAbundCommand::GetRelAbundCommand(){
try {
abort = true; calledHelp = true;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "relabund";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("relabund");
ofstream out;
m->openOutputFile(outputFileName, out);
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
vector<string> setParameters();
string getCommandName() { return "get.relabund"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.relabund"; }
string getDescription() { return "calculates the relative abundance of each OTU in a sample"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string GetSAbundCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "sabund") { outputFileName = "sabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetSAbundCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
GetSAbundCommand::GetSAbundCommand(){
try {
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- filename = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "sabund";
+ filename = outputDir + m->getRootName(m->getSimpleName(inputfile)) + getOutputFileNameTag("sabund");
m->openOutputFile(filename, out);
input = new InputData(inputfile, format);
vector<string> setParameters();
string getCommandName() { return "get.sabund"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.sabund"; }
string getDescription() { return "creates a sabund file"; }
}
}
//**********************************************************************************************************************
+string GetSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "qfile") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "accnosreport"){ outputFileName = "accnos.report"; }
+ else if (type == "alignreport") { outputFileName = "pick.align.report"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
GetSeqsCommand::GetSeqsCommand(string option) {
try {
abort = false; calledHelp = false;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//get names you want to keep
- readAccnos();
+ names = m->readAccnos(accnosfile);
if (m->control_pressed) { return 0; }
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(qualfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" + m->getExtension(qualfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
ofstream out;
m->openOutputFile(outputFileName, out);
}
}
//**********************************************************************************************************************
-
-int GetSeqsCommand::readAccnos(){
- try {
-
- ifstream in;
- m->openInputFile(accnosfile, in);
- string name;
-
- while(!in.eof()){
- in >> name;
-
- names.insert(name);
-
- m->gobble(in);
- }
- in.close();
-
- return 0;
-
- }
- catch(exception& e) {
- m->errorOut(e, "GetSeqsCommand", "readAccnos");
- exit(1);
- }
-}
-//**********************************************************************************************************************
//just looking at common mistakes.
int GetSeqsCommand::runSanityCheck(){
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(accnosfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + "accnos.report";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + getOutputFileNameTag("accnosreport");
ofstream out;
m->openOutputFile(outputFileName, out);
}
catch(exception& e) {
- m->errorOut(e, "GetSeqsCommand", "readAccnos");
+ m->errorOut(e, "GetSeqsCommand", "compareAccnos");
exit(1);
}
}
vector<string> setParameters();
string getCommandName() { return "get.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.seqs"; }
string getDescription() { return "gets sequences from a list, fasta, name, group, alignreport, quality or taxonomy file"; }
int readName();
int readGroup();
int readAlign();
- int readAccnos();
int readList();
int readTax();
int readQual();
exit(1);
}
}
+//**********************************************************************************************************************
+string GetSharedOTUCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "shared.fasta"; }
+ else if (type == "accnos") { outputFileName = "accnos"; }
+ else if (type == "sharedseqs") { outputFileName = "shared.seqs"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetSharedOTUCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
GetSharedOTUCommand::GetSharedOTUCommand(){
try {
if (outputDir == "") { outputDir += m->hasPath(listfile); }
if (output != "accnos") {
- outputFileNames = outputDir + m->getRootName(m->getSimpleName(listfile)) + shared->getLabel() + userGroups + ".shared.seqs";
+ outputFileNames = outputDir + m->getRootName(m->getSimpleName(listfile)) + shared->getLabel() + userGroups + "." + getOutputFileNameTag("sharedseqs");
}else {
- outputFileNames = outputDir + m->getRootName(m->getSimpleName(listfile)) + shared->getLabel() + userGroups + ".accnos";
+ outputFileNames = outputDir + m->getRootName(m->getSimpleName(listfile)) + shared->getLabel() + userGroups + "." + getOutputFileNameTag("accnos");
}
m->openOutputFile(outputFileNames, outNames);
vector<string> namesOfSeqsInThisBin;
- string names = shared->get(i);
- while ((names.find_first_of(',') != -1)) {
- string name = names.substr(0,names.find_first_of(','));
- names = names.substr(names.find_first_of(',')+1, names.length());
+ string names = shared->get(i);
+ vector<string> binNames;
+ m->splitAtComma(names, binNames);
+ for(int j = 0; j < binNames.size(); j++) {
+ string name = binNames[j];
//find group
string seqGroup = groupMap->getGroup(name);
else { atLeastOne[seqGroup]++; }
}
- //get last name
- string seqGroup = groupMap->getGroup(names);
- if (output != "accnos") {
- namesOfSeqsInThisBin.push_back((names + "|" + seqGroup + "|" + toString(i+1)));
- }else { namesOfSeqsInThisBin.push_back(names); }
-
- if (seqGroup == "not found") { m->mothurOut(names + " is not in your groupfile. Please correct."); m->mothurOutEndLine(); exit(1); }
-
- //is this seq in one of hte groups we care about
- it = groupFinder.find(seqGroup);
- if (it == groupFinder.end()) { uniqueOTU = false; } //you have a sequence from a group you don't want
- else { atLeastOne[seqGroup]++; }
-
-
//make sure you have at least one seq from each group you want
bool sharedByAll = true;
map<string, int>::iterator it2;
//if fasta file provided output new fasta file
if ((fastafile != "") && wroteSomething) {
if (outputDir == "") { outputDir += m->hasPath(fastafile); }
- string outputFileFasta = outputDir + m->getRootName(m->getSimpleName(fastafile)) + shared->getLabel() + userGroups + ".shared.fasta";
+ string outputFileFasta = outputDir + m->getRootName(m->getSimpleName(fastafile)) + shared->getLabel() + userGroups + "." + getOutputFileNameTag("fasta");
ofstream outFasta;
m->openOutputFile(outputFileFasta, outFasta);
outputNames.push_back(outputFileFasta); outputTypes["fasta"].push_back(outputFileFasta);
string getCommandName() { return "get.sharedseqs"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
string getRequiredCommand() { return "none"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.sharedseqs"; }
string getDescription() { return "identifies sequences that are either unique or shared by specific groups"; }
/************************************************************/
int GroupMap::readMap() {
- string seqName, seqGroup;
+ try {
+ string seqName, seqGroup;
int error = 0;
-
- while(fileHandle){
- fileHandle >> seqName; m->gobble(fileHandle); //read from first column
- fileHandle >> seqGroup; //read from second column
-
- if (m->control_pressed) { fileHandle.close(); return 1; }
-
- setNamesOfGroups(seqGroup);
-
- it = groupmap.find(seqName);
-
- if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
- else {
- groupmap[seqName] = seqGroup; //store data in map
- seqsPerGroup[seqGroup]++; //increment number of seqs in that group
- }
- m->gobble(fileHandle);
- }
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+
+ while (!fileHandle.eof()) {
+ if (m->control_pressed) { fileHandle.close(); return 1; }
+
+ fileHandle.read(buffer, 4096);
+ vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { seqName = pieces[i]; columnOne=false; }
+ else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ setNamesOfGroups(seqGroup);
+
+ it = groupmap.find(seqName);
+
+ if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ groupmap[seqName] = seqGroup; //store data in map
+ seqsPerGroup[seqGroup]++; //increment number of seqs in that group
+ }
+ pairDone = false;
+ }
+ }
+ }
fileHandle.close();
+
m->setAllGroups(namesOfGroups);
return error;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GroupMap", "readMap");
+ exit(1);
+ }
}
/************************************************************/
int GroupMap::readDesignMap() {
- string seqName, seqGroup;
+ try {
+ string seqName, seqGroup;
int error = 0;
-
- while(fileHandle){
- fileHandle >> seqName; m->gobble(fileHandle); //read from first column
- fileHandle >> seqGroup; //read from second column
-
- if (m->control_pressed) { fileHandle.close(); return 1; }
-
- setNamesOfGroups(seqGroup);
-
- it = groupmap.find(seqName);
-
- if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine(); }
- else {
- groupmap[seqName] = seqGroup; //store data in map
- seqsPerGroup[seqGroup]++; //increment number of seqs in that group
- }
- m->gobble(fileHandle);
- }
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+
+ while (!fileHandle.eof()) {
+ if (m->control_pressed) { fileHandle.close(); return 1; }
+
+ fileHandle.read(buffer, 4096);
+ vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { seqName = pieces[i]; columnOne=false; }
+ else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ setNamesOfGroups(seqGroup);
+
+ it = groupmap.find(seqName);
+
+ if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ groupmap[seqName] = seqGroup; //store data in map
+ seqsPerGroup[seqGroup]++; //increment number of seqs in that group
+ }
+ pairDone = false;
+ }
+ }
+ }
fileHandle.close();
+
m->setAllGroups(namesOfGroups);
return error;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GroupMap", "readDesignMap");
+ exit(1);
+ }
}
/************************************************************/
int GroupMap::readDesignMap(string filename) {
- groupFileName = filename;
- m->openInputFile(filename, fileHandle);
- index = 0;
- string seqName, seqGroup;
- int error = 0;
-
- while(fileHandle){
- fileHandle >> seqName; m->gobble(fileHandle); //read from first column
- fileHandle >> seqGroup; //read from second column
-
- if (m->control_pressed) { fileHandle.close(); return 1; }
-
- setNamesOfGroups(seqGroup);
-
- it = groupmap.find(seqName);
+ try {
+ groupFileName = filename;
+ m->openInputFile(filename, fileHandle);
+ index = 0;
+ string seqName, seqGroup;
+ int error = 0;
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
- if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine(); }
- else {
- groupmap[seqName] = seqGroup; //store data in map
- seqsPerGroup[seqGroup]++; //increment number of seqs in that group
+ while (!fileHandle.eof()) {
+ if (m->control_pressed) { fileHandle.close(); return 1; }
+
+ fileHandle.read(buffer, 4096);
+ vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { seqName = pieces[i]; columnOne=false; }
+ else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ setNamesOfGroups(seqGroup);
+
+ it = groupmap.find(seqName);
+
+ if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ groupmap[seqName] = seqGroup; //store data in map
+ seqsPerGroup[seqGroup]++; //increment number of seqs in that group
+ }
+ pairDone = false;
+ }
+ }
}
- m->gobble(fileHandle);
+ fileHandle.close();
+
+ m->setAllGroups(namesOfGroups);
+ return error;
}
- fileHandle.close();
- m->setAllGroups(namesOfGroups);
- return error;
+ catch(exception& e) {
+ m->errorOut(e, "GroupMap", "readDesignMap");
+ exit(1);
+ }
}
/************************************************************/
int GroupMap::getNumGroups() { return namesOfGroups.size(); }
}
}
//**********************************************************************************************************************
+string HClusterCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "list") { outputFileName = "list"; }
+ else if (type == "rabund") { outputFileName = "rabund"; }
+ else if (type == "sabund") { outputFileName = "sabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "HClusterCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
HClusterCommand::HClusterCommand(){
try {
abort = true; calledHelp = true;
else if (method == "weighted") { tag = "wn"; }
else { tag = "an"; }
- m->openOutputFile(fileroot+ tag + ".sabund", sabundFile);
- m->openOutputFile(fileroot+ tag + ".rabund", rabundFile);
- m->openOutputFile(fileroot+ tag + ".list", listFile);
-
- outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
- outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
- outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
+ string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
+ string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
+ string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
+
+ m->openOutputFile(sabundFileName, sabundFile);
+ m->openOutputFile(rabundFileName, rabundFile);
+ m->openOutputFile(listFileName, listFile);
+
+ outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+ outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+ outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
}
}
}
vector<string> setParameters();
string getCommandName() { return "hcluster"; }
string getCommandCategory() { return "Clustering"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Sun Y, Cai Y, Liu L, Yu F, Farrell ML, Mckendree W, Farmerie W (2009). ESPRIT: estimating species richness using large collections of 16S rRNA pyrosequences. Nucleic Acids Res 37: e76. \nhttp://www.mothur.org/wiki/Hcluster"; }
string getDescription() { return "cluster your sequences into OTUs using a distance matrix"; }
}
}
+//**********************************************************************************************************************
+
+string HeatMapCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "svg") { outputFileName = "svg"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "HeatMapCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
HeatMapCommand::HeatMapCommand(){
try {
vector<string> setParameters();
string getCommandName() { return "heatmap.bin"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Heatmap.bin"; }
string getDescription() { return "generate a heatmap where the color represents the relative abundanceof an OTU"; }
exit(1);
}
}
+//**********************************************************************************************************************
+
+string HeatMapSimCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "svg") { outputFileName = "svg"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "HeatMapSimCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
HeatMapSimCommand::HeatMapSimCommand(){
try {
vector<string> setParameters();
string getCommandName() { return "heatmap.sim"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Heatmap.sim"; }
string getDescription() { return "generate a heatmap indicating the pairwise distance between multiple samples using a variety of calculators"; }
string getCommandName() { return "help"; }
string getCommandCategory() { return "Hidden"; }
string getHelpString() { return "For more information about a specific command type 'commandName(help)' i.e. 'cluster(help)'"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getCitation() { return "no citation"; }
string getDescription() { return "help"; }
exit(1);
}
}
-
//**********************************************************************************************************************
-
+string HomovaCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "homova") { outputFileName = "homova"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "HomovaCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
HomovaCommand::HomovaCommand(){
try {
abort = true; calledHelp = true;
//create a new filename
ofstream HOMOVAFile;
- string HOMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + "homova";
+ string HOMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("homova");
m->openOutputFile(HOMOVAFileName, HOMOVAFile);
outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
vector<string> setParameters();
string getCommandName() { return "homova"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71. \nhttp://www.mothur.org/wiki/Homova"; }
string getDescription() { return "homova"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string IndicatorCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "tree") { outputFileName = "indicator.tre"; }
+ else if (type == "summary") { outputFileName = "indicator.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "IndicatorCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
IndicatorCommand::IndicatorCommand(){
try {
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + getOutputFileNameTag("summary");
outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
ofstream out;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + getOutputFileNameTag("summary");
outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
ofstream out;
string treeOutputDir = outputDir;
if (outputDir == "") { treeOutputDir += m->hasPath(treefile); }
- string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "indicator.tre";
+ string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("tree");
//create a map from tree node index to names of descendants, save time later to know which sharedRabund you need
vector<string> setParameters();
string getCommandName() { return "indicator"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Dufrene M, Legendre P (1997). Species assemblages and indicator species: The need for a flexible asymmetrical approach. Ecol Monogr 67: 345-66.\n McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Indicator"; }
string getDescription() { return "calculate the indicator value for each OTU"; }
--- /dev/null
+/*
+ * File: kruskalwalliscommand.cpp
+ * Author: kiverson
+ *
+ * Created on June 26, 2012, 11:06 AM
+ */
+
+#include "kruskalwalliscommand.h"
+
+//**********************************************************************************************************************
+vector<string> KruskalWallisCommand::setParameters(){
+ try {
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KruskalWallisCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string KruskalWallisCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The kruskalwallis command parameter options are \n";
+ helpString += "Kruskal–Wallis one-way analysis of variance is a non-parametric method for testing whether samples originate from the same distribution.";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KruskalWallisCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string KruskalWallisCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "summary") { outputFileName = "cooccurence.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KruskalWallisCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+KruskalWallisCommand::KruskalWallisCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+KruskalWallisCommand::KruskalWallisCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+ map<string,string>::iterator it;
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ //get shared file
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else if (sharedfile == "not found") {
+ //if there is a current shared file, use it
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setSharedFile(sharedfile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
+
+ groups = validParameter.validFile(parameters, "groups", false);
+ if (groups == "not found") { groups = ""; }
+ else {
+ m->splitAtDash(groups, Groups);
+ }
+ m->setGroups(Groups);
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+
+
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int KruskalWallisCommand::execute(){
+ try {
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ InputData* input = new InputData(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ ofstream out;
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("summary");
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
+ out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+ out << "H\tpvalue\n";
+
+ //math goes here
+
+ int N = m->getNumGroups();
+ double H;
+ double tmp = 0.0;
+ vector<groupRank> vec;
+ vector<string> groups = m->getGroups();
+ string group;
+ int count;
+ double sum;
+
+ //merge all groups into a vector
+
+
+
+ //rank function here
+ assignRank(vec);
+
+ //populate counts and ranSums vectors
+ for (int i=0;i<N;i++) {
+ count = 0;
+ sum = 0;
+ group = groups[i];
+ for(int j;j<vec.size();j++) {
+ if (vec[j].group == group) {
+ count++;
+ sum = sum + vec[j].rank;
+ }
+ }
+ counts[i] = count;
+ rankSums[i] = sum;
+ }
+
+ //test statistic
+ for (int i=0;i<N;i++) { tmp = tmp + (pow(rankSums[i],2) / counts[i]); }
+
+ H = (12 / (N*(N+1))) * tmp - (3*(N+1));
+
+ //ss = tmp - pow(accumulate(rankSums.begin(), rankSums.end(), 0), 2);
+
+ //H = ss / ( (N * (N + 1))/12 );
+
+ //correction for ties?
+
+ //p-value calculation
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KruskalWallisCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+void KruskalWallisCommand::assignRank(vector<groupRank> &vec) {
+ try {
+ double rank = 1;
+ double numRanks, avgRank, j;
+ vector<groupRank>::iterator it, oldit;
+
+ sort (vec.begin(), vec.end(), comparevalue);
+
+ it = vec.begin();
+
+ while ( it != vec.end() ) {
+ j = rank;
+ oldit = it;
+ if (!equalvalue(*it, *(it+1))) {
+ (*it).rank = rank;
+ rank = rank+1;
+ it++; }
+ else {
+ while(equalrank(*it, *(it+1))) {
+ j = j + (j+1);
+ rank++;
+ it++;
+ }
+ numRanks = double (distance(oldit, it));
+ avgRank = j / numRanks;
+ while(oldit != it) {
+ (*oldit).rank = avgRank;
+ oldit++;
+ }
+ }
+
+ }
+
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KruskalWallisCommand", "getRank");
+ exit(1);
+ }
+
+}
+//**********************************************************************************************************************
+void KruskalWallisCommand::assignValue(vector<groupRank> &vec) {
+
+}
+//**********************************************************************************************************************
+//**********************************************************************************************************************
+//**********************************************************************************************************************
\ No newline at end of file
--- /dev/null
+/*
+ * File: kruskalwalliscommand.h
+ * Author: kiverson
+ *
+ * Created on June 26, 2012, 11:07 AM
+ */
+
+#ifndef KRUSKALWALLISCOMMAND_H
+#define KRUSKALWALLISCOMMAND_H
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "sharedrabundvector.h"
+
+
+class KruskalWallisCommand : public Command {
+
+public:
+
+ KruskalWallisCommand(string);
+ KruskalWallisCommand();
+ ~KruskalWallisCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "kruskalwallis"; }
+ string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/kruskalwallis"; }
+ string getDescription() { return "Non-parametric method for testing whether samples originate from the same distribution."; }
+
+ struct groupRank {
+ string group;
+ double value;
+ double rank;
+ };
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+ void assignRank(vector<groupRank>&);
+ void assignValue(vector<groupRank>&);
+
+
+private:
+ string outputDir, sharedfile, groups;
+ bool abort;
+ set<string> labels;
+ vector<string> outputNames, Groups;
+ vector<int> counts;
+ vector<double> rankSums;
+ vector<double> rankMeans;
+
+
+
+ static bool comparevalue(const groupRank &a, const groupRank &b) { return a.value < b.value; }
+ static bool equalvalue(const groupRank &a, const groupRank &b) { return a.value == b.value; }
+ static bool comparerank(const groupRank &a, const groupRank &b) { return a.rank < b.rank; }
+ static bool equalrank(const groupRank &a, const groupRank &b) { return a.rank == b.rank; }
+ static bool equalgroup(const groupRank &a, const groupRank &b) { return a.group == b.group; }
+
+};
+
+#endif /* KRUSKALWALLISCOMMAND_H */
+
}
}
//**********************************************************************************************************************
+string LibShuffCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "coverage") { outputFileName = "libshuff.coverage"; }
+ else if (type == "libshuffsummary") { outputFileName = "libshuff.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LibShuffCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
LibShuffCommand::LibShuffCommand(){
try {
abort = true; calledHelp = true;
try {
ofstream outCov;
- summaryFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "libshuff.coverage";
+ summaryFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + getOutputFileNameTag("coverage");
m->openOutputFile(summaryFile, outCov);
outputNames.push_back(summaryFile); outputTypes["coverage"].push_back(summaryFile);
outCov.setf(ios::fixed, ios::floatfield); outCov.setf(ios::showpoint);
try {
ofstream outSum;
- summaryFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "libshuff.summary";
+ summaryFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + getOutputFileNameTag("libshuffsummary");
m->openOutputFile(summaryFile, outSum);
outputNames.push_back(summaryFile); outputTypes["libshuffsummary"].push_back(summaryFile);
vector<string> setParameters();
string getCommandName() { return "libshuff"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Singleton DR, Furlong MA, Rathbun SL, Whitman WB (2001). Quantitative comparisons of 16S rRNA gene sequence libraries from environmental samples. Appl Environ Microbiol 67: 4374-6. \nSchloss PD, Larget BR, Handelsman J (2004). Integration of microbial ecology and statistics: a test to compare gene libraries. Appl Environ Microbiol 70: 5485-92. \nhttp://www.mothur.org/wiki/Libshuff"; }
string getDescription() { return "a generic test that describes whether two or more communities have the same structure using the Cramer-von Mises test statistic"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string ListOtuLabelsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "otulabels") { outputFileName = "otulabels"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListOtuLabelsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
ListOtuLabelsCommand::ListOtuLabelsCommand(){
try {
int ListOtuLabelsCommand::createList(vector<SharedRAbundVector*>& lookup){
try {
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".otu.labels";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("otulabels");
outputNames.push_back(outputFileName); outputTypes["otulabels"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
int ListOtuLabelsCommand::createList(vector<SharedRAbundFloatVector*>& lookup){
try {
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".otu.labels";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("otulabels");
outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
string getCommandName() { return "list.otulabels"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
//commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/List.otulabels"; }
string getDescription() { return "lists otu labels from shared or relabund file. Can be used by get.otulabels with output from classify.otu, otu.association, or corr.axes to select specific otus."; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string ListSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "accnos") { outputFileName = "accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
ListSeqsCommand::ListSeqsCommand(){
try {
if (outputDir == "") { outputDir += m->hasPath(inputFileName); }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + "accnos";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + getOutputFileNameTag("accnos");
ofstream out;
m->openOutputFile(outputFileName, out);
vector<string> setParameters();
string getCommandName() { return "list.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/List.seqs"; }
string getDescription() { return "lists sequences from a list, fasta, name, group, alignreport or taxonomy file"; }
--- /dev/null
+//
+// loadlogfilecommand.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 6/13/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "loadlogfilecommand.h"
+#include "commandoptionparser.hpp"
+#include "commandfactory.hpp"
+#include "setcurrentcommand.h"
+
+//**********************************************************************************************************************
+vector<string> LoadLogfileCommand::setParameters(){
+ try {
+ CommandParameter plogfile("logfile", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plogfile);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LoadLogfileCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string LoadLogfileCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The load.logfile command extracts the current file names from a logfile.\n";
+ helpString += "The load.logfile parameter is logfile, and it is required.\n";
+ helpString += "The load.logfile command should be in the following format: \n";
+ helpString += "load.logfile(logfile=yourLogFile)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LoadLogfileCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+LoadLogfileCommand::LoadLogfileCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LoadLogfileCommand", "LoadLogfileCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+LoadLogfileCommand::LoadLogfileCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+
+ string path;
+ it = parameters.find("logfile");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["logfile"] = inputDir + it->second; }
+ }
+
+ }
+
+ //get shared file, it is required
+ logfile = validParameter.validFile(parameters, "logfile", true);
+ if (logfile == "not open") { logfile = ""; abort = true; }
+ else if (logfile == "not found") { m->mothurOut("The logfile parameter is required."); m->mothurOutEndLine();abort = true; }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(logfile); //if user entered a file with a path then preserve it
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "NewCommand", "NewCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int LoadLogfileCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Extracting current files names..."); m->mothurOutEndLine();
+ m->mothurOutEndLine();
+
+ CommandFactory* cFactory;
+ cFactory = CommandFactory::getInstance();
+
+ ifstream in;
+ m->openInputFile(logfile, in);
+
+ set<string> currentTypes = m->getCurrentTypes();
+ map<string, string> currentFiles;
+ string commandName = "";
+ bool skip = false;
+ string line = "";
+
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ if (!skip) { line = m->getline(in); m->gobble(in); }
+ m->gobble(in);
+
+ //look for "mothur >"
+ int pos = line.find("mothur > "); //command line
+ int pos2 = line.find("Output File "); //indicates command completed and we can update the current file
+ int pos3 = line.find("*****************");
+
+ //skipping over parts where a command runs another command
+ if (pos3 != string::npos) {
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+ line = m->getline(in); m->gobble(in);
+ int posTemp = line.find("*****************");
+ if (posTemp != string::npos) { break; }
+ }
+ }
+
+ if (pos != string::npos) {
+ skip=false;
+ //extract command name and option string
+ string input = line.substr(pos+9);
+ CommandOptionParser parser(input);
+ commandName = parser.getCommandString();
+ string options = parser.getOptionString();
+
+ //parse out parameters in option string
+ map<string,string> parameters;
+ OptionParser optionParser(options, parameters);
+
+ for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ if (currentTypes.count((it->first)) != 0) { //if this is a type we save
+ if (it->second != "current") { currentFiles[it->first] = it->second; }//save the input file name as current
+ }
+ }
+ }else if (pos2 != string::npos) {
+ //read file output file names
+ vector<string> theseOutputNames;
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+ line = m->getline(in); m->gobble(in);
+ int pos = line.find("mothur > ");
+ if (pos != string::npos) { skip = true; break; }
+ else { theseOutputNames.push_back(line); }
+ }
+ //ask command for the output names for each type based on inputs
+ Command* command = cFactory->getCommand(commandName);
+ map<string, vector<string> > thisOutputTypes = command->getOutputFiles();
+
+ for (map<string, vector<string> >::iterator it = thisOutputTypes.begin(); it != thisOutputTypes.end(); it++) {
+ if (currentTypes.count((it->first)) != 0) { //do we save this type
+ //if yes whats its tag
+ map<string, string>::iterator itCurrentFiles = currentFiles.find(it->first);
+ string thisTypesCurrentFile = "";
+ if (itCurrentFiles != currentFiles.end()) { thisTypesCurrentFile = itCurrentFiles->second; }
+
+ string tag = command->getOutputFileNameTag(it->first, thisTypesCurrentFile); //pass it "fasta" and the current fasta file name. some commands use the current name to get the extension, the others wont care.
+ //search for the tag in the list of output files
+ for (int h = 0; h < theseOutputNames.size(); h++) {
+ string ending = theseOutputNames[h].substr(theseOutputNames[h].length()-tag.length(), tag.length());
+ if (ending == tag) { //if it's there and this is a type we save a current version of, save it
+ if ((it->first == "column") || (it->first == "phylip")) { //check for format
+ string RippedName = "";
+ bool foundDot = false;
+ for (int i = theseOutputNames[h].length()-1; i >= 0; i--) {
+ if (foundDot && (theseOutputNames[h][i] != '.')) { RippedName = theseOutputNames[h][i] + RippedName; }
+ else if (foundDot && (theseOutputNames[h][i] == '.')) { break; }
+ else if (!foundDot && (theseOutputNames[h][i] == '.')) { foundDot = true; }
+ }
+ if ((RippedName == "phylip") || (RippedName == "lt") || (RippedName == "square")) { currentFiles["phylip"] = theseOutputNames[h]; }
+ else { currentFiles["column"] = theseOutputNames[h]; }
+ }else { currentFiles[it->first] = theseOutputNames[h]; }
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ in.close();
+
+ if (m->control_pressed) { return 0; }
+
+ //output results
+ string inputString = "";
+ for (map<string, string>::iterator it = currentFiles.begin(); it != currentFiles.end(); it++) { inputString += it->first + "=" + it->second + ","; }
+
+ if (inputString != "") {
+ inputString = inputString.substr(0, inputString.length()-1);
+ m->mothurOutEndLine();
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ m->mothurOut("Running command: set.current(" + inputString + ")"); m->mothurOutEndLine();
+ m->mothurCalling = true;
+
+ Command* currentCommand = new SetCurrentCommand(inputString);
+ currentCommand->execute();
+
+ delete currentCommand;
+ m->mothurCalling = false;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LoadLogfileCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
--- /dev/null
+#ifndef Mothur_loadlogfilecommand_h
+#define Mothur_loadlogfilecommand_h
+
+//
+// loadlogfilecommand.h
+// Mothur
+//
+// Created by Sarah Westcott on 6/13/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+
+#include "command.hpp"
+
+/**************************************************************************************************/
+
+class LoadLogfileCommand : public Command {
+public:
+ LoadLogfileCommand(string);
+ LoadLogfileCommand();
+ ~LoadLogfileCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "load.logfile"; }
+ string getCommandCategory() { return "General"; }
+ string getOutputFileNameTag(string, string) { return ""; }
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Load.logfile"; }
+ string getDescription() { return "extracts current files from a logfile"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort;
+ string outputDir, logfile;
+ vector<string> outputNames;
+};
+
+/**************************************************************************************************/
+
+
+
+
+#endif
exit(1);
}
}
+//**********************************************************************************************************************
+string MakeBiomCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "biom") { outputFileName = "biom"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeBiomCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
MakeBiomCommand::MakeBiomCommand(){
try {
int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
try {
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + ".biom";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("biom");
ofstream out;
m->openOutputFile(outputFileName, out);
outputNames.push_back(outputFileName); outputTypes["biom"].push_back(outputFileName);
vector<string> setParameters();
string getCommandName() { return "make.biom"; }
string getCommandCategory() { return "General"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://biom-format.org/documentation/biom_format.html, http://www.mothur.org/wiki/Make.biom"; }
string getDescription() { return "creates a biom file"; }
string helpString = "";
helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n";
- helpString += "The ffastq and rfastq parameter is required.\n";
- helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
+ helpString += "The ffastq and rfastq parameters are required.\n";
+ helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh and needleman. The default is needleman.\n";
helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n";
helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
exit(1);
}
}
-
+//**********************************************************************************************************************
+string MakeContigsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "contigs.fasta"; }
+ else if (type == "qfile") { outputFileName = "contigs.qual"; }
+ else if (type == "mismatch") { outputFileName = "contigs.mismatch"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
MakeContigsCommand::MakeContigsCommand(){
try {
if (m->control_pressed) { return 0; }
- string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.fasta";
- string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.qual";
- string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.mismatches";
+ string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("fasta");
+ string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("qfile");
+ string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("mismatch");
outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
string getCommandName() { return "make.contigs"; }
string getCommandCategory() { return "Sequence Processing"; }
//commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Make.contigs"; }
string getDescription() { return "description"; }
contigScores.push_back(scores1[ABaseMap[i]]);
if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; }
}else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
- if (scores2[BBaseMap[i]] >= pDataArray->threshold)) {
+ if (scores2[BBaseMap[i]] >= pDataArray->threshold) {
contig += seq2[i];
contigScores.push_back(scores2[BBaseMap[i]]);
}
}
}
//**********************************************************************************************************************
+string MakeFastQCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fastq") { outputFileName = "fastq"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeFastQCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
MakeFastQCommand::MakeFastQCommand(){
try {
abort = true; calledHelp = true;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- string outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "fastq";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fastq");
outputNames.push_back(outputFile); outputTypes["fastq"].push_back(outputFile);
ofstream out;
vector<string> setParameters();
string getCommandName() { return "make.fastq"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Make.fastq"; }
string getDescription() { return "creates a fastq file from a fasta and quality file"; }
CYGWIN_BUILD ?= no
USECOMPRESSION ?= no
MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"5/14/2012\""
-VERSION = "\"1.25.1\""
+RELEASE_DATE = "\"7/9/2012\""
+VERSION = "\"1.26.0\""
FORTAN_COMPILER = gfortran
FORTRAN_FLAGS =
}
}
//**********************************************************************************************************************
+string MakeGroupCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "group") { outputFileName = "groups"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeGroupCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
MakeGroupCommand::MakeGroupCommand(){
try {
abort = true; calledHelp = true;
}
//prevent giantic file name
- if (fastaFileNames.size() > 3) { filename = outputDir + "merge.groups"; }
- else { filename += "groups"; }
+ if (fastaFileNames.size() > 3) { filename = outputDir + "merge." + getOutputFileNameTag("group"); }
+ else { filename += getOutputFileNameTag("group"); }
//make sure there is at least one valid file left
if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
vector<string> setParameters();
string getCommandName() { return "make.group"; }
string getCommandCategory() { return "General"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Make.group"; }
string getDescription() { return "creates a group file"; }
}
}
//**********************************************************************************************************************
+string MantelCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "mantel") { outputFileName = "mantel"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MantelCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
MantelCommand::MantelCommand(){
try {
abort = true; calledHelp = true;
if (m->control_pressed) { return 0; }
- string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile1)) + "mantel";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile1)) + getOutputFileNameTag("mantel");
outputNames.push_back(outputFile); outputTypes["mantel"].push_back(outputFile);
ofstream out;
vector<string> setParameters();
string getCommandName() { return "mantel"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Mantel"; }
string getDescription() { return "Mantel’s test for correlation between matrices"; }
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "",true,false); parameters.push_back(pcalc);
CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
+ CommandParameter pmode("mode", "Multiple", "average-median", "average", "", "", "",false,false); parameters.push_back(pmode);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
try {
string helpString = "";
ValidCalculators validCalculator;
- helpString += "The dist.shared command parameters are shared, groups, calc, output, processors, subsample, iters and label. shared is a required, unless you have a valid current file.\n";
+ helpString += "The dist.shared command parameters are shared, groups, calc, output, processors, subsample, iters, mode, and label. shared is a required, unless you have a valid current file.\n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n";
helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like distance matrices created for, and is also separated by dashes.\n";
helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
helpString += "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n";
helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
+ helpString += "The mode parameter allows you to specify if you want the average or the median values reported when subsampling. Options are average, and median. The default is average.\n";
helpString += "Example dist.shared(groups=A-B-C, calc=jabund-sorabund).\n";
helpString += "The default value for groups is all the groups in your groupfile.\n";
helpString += "The default value for calc is jclass and thetayc.\n";
}
}
//**********************************************************************************************************************
+string MatrixOutputCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "phylip") { outputFileName = "dist"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MatrixOutputCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
MatrixOutputCommand::MatrixOutputCommand(){
try {
abort = true; calledHelp = true;
output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "lt"; }
if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
+
+ mode = validParameter.validFile(parameters, "mode", false); if(mode == "not found"){ mode = "average"; }
+ if ((mode != "average") && (mode != "median")) { m->mothurOut(mode + " is not a valid mode. Options are average and medina. I will use average."); m->mothurOutEndLine(); output = "average"; }
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; }
out << endl;
}
}else{
- for (int b = 0; b < simMatrix.size(); m++) {
+ for (int b = 0; b < simMatrix.size(); b++) {
out << lookup[b]->getGroup() << '\t';
for (int n = 0; n < simMatrix[b].size(); n++) {
out << simMatrix[b][n] << '\t';
}
if (subsample && (thisIter != 0)) {
+ if((thisIter) % 100 == 0){ m->mothurOut(toString(thisIter)); m->mothurOutEndLine(); }
calcDistsTotals.push_back(calcDists);
+ for (int i = 0; i < calcDists.size(); i++) {
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ if (m->debug) { m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n"); }
+ }
+ }
//clean up memory
for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
thisItersLookup.clear();
- for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
}else { //print results for whole dataset
for (int i = 0; i < calcDists.size(); i++) {
if (m->control_pressed) { break; }
matrix[column][row] = dist;
}
- string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist";
+ string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + "." + getOutputFileNameTag("phylip");
outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
ofstream outDist;
m->openOutputFile(distFileName, outDist);
outDist.close();
}
}
+ for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
}
if (iters != 1) {
calcAverages[i].resize(calcDistsTotals[0][i].size());
for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].seq1 = calcDists[i][j].seq1;
- calcAverages[i][j].seq2 = calcDists[i][j].seq2;
+ calcAverages[i][j].seq1 = calcDistsTotals[0][i][j].seq1;
+ calcAverages[i][j].seq2 = calcDistsTotals[0][i][j].seq2;
calcAverages[i][j].dist = 0.0;
}
}
-
- for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
- for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
+ if (mode == "average") {
+ for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
+ for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+ if (m->debug) { m->mothurOut("[DEBUG]: Totaling for average calc: iter = " + toString(thisIter) + ", " + thisLookup[calcDistsTotals[thisIter][i][j].seq1]->getGroup() + " - " + thisLookup[calcDistsTotals[thisIter][i][j].seq2]->getGroup() + " distance = " + toString(calcDistsTotals[thisIter][i][j].dist) + ". New total = " + toString(calcAverages[i][j].dist) + ".\n"); }
+ }
+ }
+ }
+
+ for (int i = 0; i < calcAverages.size(); i++) { //finds average.
for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+ calcAverages[i][j].dist /= (float) iters;
}
}
- }
-
- for (int i = 0; i < calcAverages.size(); i++) { //finds average.
- for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].dist /= (float) iters;
+ }else { //find median
+ for (int i = 0; i < calcAverages.size(); i++) { //for each calc
+ for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
+ vector<double> dists;
+ for (int thisIter = 0; thisIter < iters; thisIter++) { //for each subsample
+ dists.push_back(calcDistsTotals[thisIter][i][j].dist);
+ }
+ sort(dists.begin(), dists.end());
+ calcAverages[i][j].dist = dists[(iters/2)];
+ }
}
}
-
//find standard deviation
vector< vector<seqDist> > stdDev; stdDev.resize(matrixCalculators.size());
for (int i = 0; i < stdDev.size(); i++) { //initialize sums to zero.
stdDev[i].resize(calcDistsTotals[0][i].size());
for (int j = 0; j < stdDev[i].size(); j++) {
- stdDev[i][j].seq1 = calcDists[i][j].seq1;
- stdDev[i][j].seq2 = calcDists[i][j].seq2;
+ stdDev[i][j].seq1 = calcDistsTotals[0][i][j].seq1;
+ stdDev[i][j].seq2 = calcDistsTotals[0][i][j].seq2;
stdDev[i][j].dist = 0.0;
}
}
stdmatrix[column][row] = stdDist;
}
- string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".ave.dist";
+ string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".ave." + getOutputFileNameTag("phylip");
outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
ofstream outAve;
m->openOutputFile(distFileName, outAve);
outAve.close();
- distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".std.dist";
+ distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".std." + getOutputFileNameTag("phylip");
outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
ofstream outSTD;
m->openOutputFile(distFileName, outSTD);
int MatrixOutputCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, vector< vector<seqDist> >& calcDists) {
try {
vector<SharedRAbundVector*> subset;
+
for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
for (int l = 0; l < k; l++) {
vector<string> setParameters();
string getCommandName() { return "dist.shared"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Dist.shared"; }
string getDescription() { return "generate a distance matrix that describes the dissimilarity among multiple groups"; }
bool abort, allLines, subsample;
set<string> labels; //holds labels to be used
- string outputFile, calc, groups, label, outputDir;
+ string outputFile, calc, groups, label, outputDir, mode;
vector<string> Estimators, Groups, outputNames; //holds estimators to be used
int process(vector<SharedRAbundVector*>, string, string);
int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
exit(1);
}
}
-
//**********************************************************************************************************************
MergeFileCommand::MergeFileCommand(){
try {
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- ofstream outputFile;
- m->openOutputFile(outputFileName, outputFile);
-
- char c;
- for(int i=0;i<numInputFiles;i++){
- ifstream inputFile; //declaration must be inside for loop of windows throws an error
-
- m->openInputFile(fileNames[i], inputFile);
-
- while(!inputFile.eof()){
- if (m->control_pressed) { outputTypes.clear(); inputFile.close(); outputFile.close(); m->mothurRemove(outputFileName); return 0; }
-
- c = inputFile.get();
- //-1 is eof char
- if (int(c) != -1) { outputFile << c; }
- }
-
- inputFile.close();
- }
-
- outputFile.close();
+ m->mothurRemove(outputFileName);
+ for(int i=0;i<numInputFiles;i++){ m->appendFiles(fileNames[i], outputFileName); }
- if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); return 0; }
+ if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
m->mothurOutEndLine();
m->mothurOut("Output File Name: "); m->mothurOutEndLine();
vector<string> setParameters();
string getCommandName() { return "merge.files"; }
string getCommandCategory() { return "General"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Merge.files"; }
string getDescription() { return "appends files creating one file"; }
}
}
//**********************************************************************************************************************
+string MergeGroupsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "shared") { outputFileName = "merge" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "merge" + m->getExtension(inputName); }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeGroupsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
MergeGroupsCommand::MergeGroupsCommand(){
try {
abort = true; calledHelp = true;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "merge" + m->getExtension(sharedfile);
- outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("shared", sharedfile);
+ outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "merge" + m->getExtension(groupfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
vector<string> setParameters();
string getCommandName() { return "merge.groups"; }
string getCommandCategory() { return "General"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Merge.groups"; }
string getDescription() { return "reads shared file and a design file and merges the groups in the shared file that are in the same grouping in the design file"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string MetaStatsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "metastats") { outputFileName = "metastats"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MetaStatsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
MetaStatsCommand::MetaStatsCommand(){
try {
string setB = namesOfGroupCombos[c][1];
//get filename
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + "." + getOutputFileNameTag("metastats");
outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
//int nameLength = outputFileName.length();
//char * output = new char[nameLength];
vector<string> setParameters();
string getCommandName() { return "metastats"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "White JR, Nagarajan N, Pop M (2009). Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5: e1000352. \nhttp://www.mothur.org/wiki/Metastats"; }
string getDescription() { return "detects differentially abundant features in clinical metagenomic samples"; }
try {
CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
}
}
//**********************************************************************************************************************
+string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "list") { outputFileName = "list"; }
+ else if (type == "rabund") { outputFileName = "rabund"; }
+ else if (type == "sabund") { outputFileName = "sabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
MGClusterCommand::MGClusterCommand(){
try {
abort = true; calledHelp = true;
if (namefile == "not open") { abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
//check for optional parameter and set defaults
string temp;
- temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
+ temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
precisionLength = temp.length();
m->mothurConvert(temp, precision);
hclusterWanted = m->isTrue(temp);
temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
- hard = m->isTrue(temp);
+ hard = m->isTrue(temp);
}
}
//**********************************************************************************************************************
int MGClusterCommand::execute(){
try {
-
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//read names file
string tag = "";
time_t start;
float previousDist = 0.00000;
- float rndPreviousDist = 0.00000;
-
+ float rndPreviousDist = 0.00000;
+
//read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
//must remember to delete those objects here since readBlast does not
read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
read->read(nameMap);
-
- list = new ListVector(nameMap->getListVector());
- RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
+
+ list = new ListVector(nameMap->getListVector());
+ RAbundVector* rabund = NULL;
+
+ if(countfile != "") {
+ //map<string, int> nameMapCounts = m->readNames(namefile);
+ ct = new CountTable();
+ ct->readTable(countfile);
+ rabund = new RAbundVector();
+ createRabund(ct, list, rabund);
+ }else {
+ rabund = new RAbundVector(list->getRAbundVector());
+ }
+
+
+ //list = new ListVector(nameMap->getListVector());
+ //rabund = new RAbundVector(list->getRAbundVector());
if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
else if (method == "nearest") { tag = "nn"; }
else { tag = "an"; }
- //open output files
- m->openOutputFile(fileroot+ tag + ".list", listFile);
- m->openOutputFile(fileroot+ tag + ".rabund", rabundFile);
- m->openOutputFile(fileroot+ tag + ".sabund", sabundFile);
+ string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
+ string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
+ string listFileName = fileroot+ tag + ".";
+ if (countfile != "") { listFileName += "unique_"; }
+ listFileName += getOutputFileNameTag("list");
+
+ if (countfile == "") {
+ m->openOutputFile(sabundFileName, sabundFile);
+ m->openOutputFile(rabundFileName, rabundFile);
+ }
+ m->openOutputFile(listFileName, listFile);
if (m->control_pressed) {
delete nameMap; delete read; delete list; delete rabund;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (!hclusterWanted) {
//get distmatrix and overlap
- SparseMatrix* distMatrix = read->getDistMatrix();
+ SparseDistanceMatrix* distMatrix = read->getDistMatrix();
overlapMatrix = read->getOverlapMatrix(); //already sorted by read
delete read;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete list; delete rabund;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
m->mothurRemove(distFile);
m->mothurRemove(overlapFile);
outputTypes.clear();
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
m->mothurRemove(distFile);
m->mothurRemove(overlapFile);
outputTypes.clear();
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
m->mothurRemove(distFile);
m->mothurRemove(overlapFile);
outputTypes.clear();
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
m->mothurRemove(distFile);
m->mothurRemove(overlapFile);
outputTypes.clear();
m->mothurRemove(overlapFile);
}
- delete list;
+ delete list;
delete rabund;
listFile.close();
- sabundFile.close();
- rabundFile.close();
-
+ if (countfile == "") {
+ sabundFile.close();
+ rabundFile.close();
+ }
if (m->control_pressed) {
delete nameMap;
- listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
- m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
- m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
+ m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
+ if (countfile == "") {
+ m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+ m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+ }
m->mothurOutEndLine();
if (saveCutoff != cutoff) {
void MGClusterCommand::printData(ListVector* mergedList){
try {
mergedList->print(listFile);
- mergedList->getRAbundVector().print(rabundFile);
-
- SAbundVector sabund = mergedList->getSAbundVector();
+ SAbundVector sabund = mergedList->getSAbundVector();
+
+ if (countfile == "") {
+ mergedList->getRAbundVector().print(rabundFile);
+ sabund.print(sabundFile);
+ }
sabund.print(cout);
- sabund.print(sabundFile);
}
catch(exception& e) {
m->errorOut(e, "MGClusterCommand", "printData");
//**********************************************************************************************************************
+void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
+ try {
+ //vector<string> names = ct.getNamesOfSeqs();
+ //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); }
+ //return rav;
+
+ for(int i = 0; i < list->getNumBins(); i++) {
+ vector<string> binNames;
+ string bin = list->get(i);
+ m->splitAtComma(bin, binNames);
+ int total = 0;
+ for (int j = 0; j < binNames.size(); j++) {
+ total += ct->getNumSeqs(binNames[j]);
+ }
+ rabund->push_back(total);
+ }
+
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MGClusterCommand", "createRabund");
+ exit(1);
+ }
+
+}
+//**********************************************************************************************************************
#include "hcluster.h"
#include "rabundvector.hpp"
#include "sabundvector.hpp"
+#include "counttable.h"
/**********************************************************************/
vector<string> setParameters();
string getCommandName() { return "mgcluster"; }
string getCommandCategory() { return "Clustering"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Schloss PD, Handelsman J (2008). A statistical toolbox for metagenomics. BMC Bioinformatics 9: 34. \nhttp://www.mothur.org/wiki/Mgcluster"; }
string getDescription() { return "cluster your sequences into OTUs using a blast file"; }
Cluster* cluster;
HCluster* hcluster;
ListVector* list;
+ CountTable* ct;
ListVector oldList;
+ RAbundVector rav;
vector<seqDist> overlapMatrix;
vector<string> outputNames;
- string blastfile, method, namefile, overlapFile, distFile, outputDir;
+ string blastfile, method, namefile, countfile, overlapFile, distFile, outputDir;
ofstream sabundFile, rabundFile, listFile;
double cutoff;
float penalty;
ListVector* mergeOPFs(map<string, int>, float);
void sortHclusterFiles(string, string);
vector<seqDist> getSeqs(ifstream&);
+ void createRabund(CountTable*&, ListVector*&, RAbundVector*&);
};
typedef unsigned long ull;
+typedef unsigned short intDist;
struct IntNode {
int lvalue;
reverseProb = rp;
}
};
-
+/***********************************************************************/
+struct PDistCell{
+ ull index;
+ float dist;
+ PDistCell() : index(0), dist(0) {};
+ PDistCell(ull c, float d) : index(c), dist(d) {}
+};
/************************************************************/
struct clusterNode {
int numSeq;
spearmanRank(string n, float s) : name(n), score(s) {}
};
+//***********************************************************************
+inline bool compareIndexes(PDistCell left, PDistCell right){
+ return (left.index > right.index);
+}
//********************************************************************************************************************
//sorts highest to lowest
inline bool compareSpearman(spearmanRank left, spearmanRank right){
- return (left.score > right.score);
+ return (left.score < right.score);
}
//********************************************************************************************************************
//sorts highest to lowest
return _uniqueInstance;
}
/*********************************************************************************************/
+set<string> MothurOut::getCurrentTypes() {
+ try {
+
+ set<string> types;
+ types.insert("fasta");
+ types.insert("accnos");
+ types.insert("column");
+ types.insert("design");
+ types.insert("group");
+ types.insert("list");
+ types.insert("name");
+ types.insert("oligos");
+ types.insert("order");
+ types.insert("ordergroup");
+ types.insert("phylip");
+ types.insert("qfile");
+ types.insert("relabund");
+ types.insert("sabund");
+ types.insert("rabund");
+ types.insert("sff");
+ types.insert("shared");
+ types.insert("taxonomy");
+ types.insert("tree");
+ types.insert("flow");
+ types.insert("biom");
+ types.insert("count");
+ types.insert("processors");
+
+ return types;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getCurrentTypes");
+ exit(1);
+ }
+}
+/*********************************************************************************************/
void MothurOut::printCurrentFiles() {
try {
+
+
if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
+ if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
}
if (treefile != "") { return true; }
if (flowfile != "") { return true; }
if (biomfile != "") { return true; }
+ if (counttablefile != "") { return true; }
if (processors != "1") { return true; }
return hasCurrent;
taxonomyfile = "";
flowfile = "";
biomfile = "";
+ counttablefile = "";
processors = "1";
}
catch(exception& e) {
int MothurOut::renameFile(string oldName, string newName){
try {
+
+ if (oldName == newName) { return 0; }
+
ifstream inTest;
int exist = openInputFile(newName, inTest, "");
inTest.close();
int numLines = 0;
if (ableToOpen == 0) { //you opened it
- while(!input.eof()){
- char c = input.get();
- if(input.eof()) { break; }
- else { output << c; if (c == '\n') {numLines++;} }
- }
+
+ char buffer[4096];
+ while (!input.eof()) {
+ input.read(buffer, 4096);
+ output.write(buffer, input.gcount());
+ //count number of lines
+ for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
+ }
input.close();
}
positions.push_back(0);
while(!in.eof()){
- //unsigned long long lastpos = in.tellg();
- //input = getline(in);
- //if (input.length() != 0) {
- //unsigned long long pos = in.tellg();
- //if (pos != -1) { positions.push_back(pos - input.length() - 1); }
- //else { positions.push_back(lastpos); }
- //}
- //gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions
-
-
//getline counting reads
char d = in.get(); count++;
while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
exit(1);
}
}
+/***********************************************************************/
+
+vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
+ try {
+ vector<string> pieces;
+
+ for (int i = 0; i < size; i++) {
+ if (!isspace(buffer[i])) { rest += buffer[i]; }
+ else {
+ if (rest != "") { pieces.push_back(rest); rest = ""; }
+ while (i < size) { //gobble white space
+ if (isspace(buffer[i])) { i++; }
+ else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
+ }
+ }
+ }
+
+ return pieces;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "splitWhiteSpace");
+ exit(1);
+ }
+}
+/***********************************************************************/
+vector<string> MothurOut::splitWhiteSpace(string input){
+ try {
+ vector<string> pieces;
+ string rest = "";
+
+ for (int i = 0; i < input.length(); i++) {
+ if (!isspace(input[i])) { rest += input[i]; }
+ else {
+ if (rest != "") { pieces.push_back(rest); rest = ""; }
+ while (i < input.length()) { //gobble white space
+ if (isspace(input[i])) { i++; }
+ else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
+ }
+ }
+ }
+
+ if (rest != "") { pieces.push_back(rest); }
+
+ return pieces;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "splitWhiteSpace");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
+ try {
+
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ //are there confidence scores, if so remove them
+ if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
+ taxMap[firstCol] = secondCol;
+ if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ return taxMap.size();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readTax");
+ exit(1);
+ }
+}
/**********************************************************************************************************************/
-int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
+int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
try {
//open input file
ifstream in;
openInputFile(namefile, in);
-
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
while (!in.eof()) {
if (control_pressed) { break; }
- string firstCol, secondCol;
- in >> firstCol >> secondCol; gobble(in);
-
- nameMap[firstCol] = secondCol;
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ //parse names into vector
+ vector<string> theseNames;
+ splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
+ pairDone = false;
+ }
+ }
}
in.close();
}
}
/**********************************************************************************************************************/
-int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
+int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
try {
//open input file
ifstream in;
openInputFile(namefile, in);
-
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
while (!in.eof()) {
if (control_pressed) { break; }
- string firstCol, secondCol;
- in >> firstCol >> secondCol; gobble(in);
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ nameMap[secondCol] = firstCol;
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ return nameMap.size();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
+ try {
+ nameMap.clear(); nameCount.clear();
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
- vector<string> temp;
- splitAtComma(secondCol, temp);
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ //parse names into vector
+ vector<string> theseNames;
+ splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
+ nameCount[firstCol] = theseNames.size();
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ return nameMap.size();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
+ try {
+
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
- nameMap[firstCol] = temp;
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+ }
}
in.close();
}
}
/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
+ try {
+
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ vector<string> temp;
+ splitAtComma(secondCol, temp);
+ nameMap[firstCol] = temp;
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ return nameMap.size();
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
map<string, int> MothurOut::readNames(string namefile) {
try {
ifstream in;
openInputFile(namefile, in);
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
while (!in.eof()) {
if (control_pressed) { break; }
- string firstCol, secondCol;
- in >> firstCol; gobble(in);
- in >> secondCol; gobble(in);
-
- int num = getNumNames(secondCol);
-
- nameMap[firstCol] = num;
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ int num = getNumNames(secondCol);
+ nameMap[firstCol] = num;
+ pairDone = false;
+ }
+ }
}
- in.close();
+ in.close();
return nameMap;
ifstream in;
openInputFile(namefile, in);
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
while (!in.eof()) {
if (control_pressed) { break; }
- string firstCol, secondCol;
- in >> firstCol >> secondCol; gobble(in);
-
- int num = getNumNames(secondCol);
-
- map<string, string>::iterator it = fastamap.find(firstCol);
- if (it == fastamap.end()) {
- error = 1;
- mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
- }else {
- seqPriorityNode temp(num, it->second, firstCol);
- nameVector.push_back(temp);
- }
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ int num = getNumNames(secondCol);
+
+ map<string, string>::iterator it = fastamap.find(firstCol);
+ if (it == fastamap.end()) {
+ error = 1;
+ mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
+ }else {
+ seqPriorityNode temp(num, it->second, firstCol);
+ nameVector.push_back(temp);
+ }
+
+ pairDone = false;
+ }
+ }
}
- in.close();
-
+ in.close();
+
return error;
-
}
catch(exception& e) {
errorOut(e, "MothurOut", "readNames");
exit(1);
}
}
-
+//**********************************************************************************************************************
+set<string> MothurOut::readAccnos(string accnosfile){
+ try {
+ set<string> names;
+ ifstream in;
+ openInputFile(accnosfile, in);
+ string name;
+
+ string rest = "";
+ char buffer[4096];
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); }
+ }
+ in.close();
+
+ return names;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readAccnos");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MothurOut::readAccnos(string accnosfile, vector<string>& names){
+ try {
+ names.clear();
+ ifstream in;
+ openInputFile(accnosfile, in);
+ string name;
+
+ string rest = "";
+ char buffer[4096];
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); }
+ }
+ in.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readAccnos");
+ exit(1);
+ }
+}
/***********************************************************************/
int MothurOut::getNumNames(string names){
exit(1);
}
}
+/***********************************************************************/
+bool MothurOut::mothurConvert(string item, intDist& num){
+ try {
+ bool error = false;
+
+ if (isNumeric1(item)) {
+ convert(item, num);
+ }else {
+ num = 0;
+ error = true;
+ mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
+ commandInputsConvertError = true;
+ }
+
+ return error;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "mothurConvert");
+ exit(1);
+ }
+}
+
/***********************************************************************/
bool MothurOut::isNumeric1(string stringToCheck){
try {
//map<string, string> names;
vector<string> binLabelsInFile;
vector<string> currentBinLabels;
- string saveNextLabel, argv, sharedHeaderMode;
+ string saveNextLabel, argv, sharedHeaderMode, groupMode;
bool printedHeaders, commandInputsConvertError;
//functions from mothur.h
string getline(istringstream&);
void gobble(istream&);
void gobble(istringstream&);
- map<string, int> readNames(string);
+ vector<string> splitWhiteSpace(string& rest, char[], int);
+ vector<string> splitWhiteSpace(string);
+ set<string> readAccnos(string);
+ int readAccnos(string, vector<string>&);
+ map<string, int> readNames(string);
+ int readTax(string, map<string, string>&);
+ int readNames(string, map<string, string>&, map<string, int>&);
int readNames(string, map<string, string>&);
+ int readNames(string, map<string, string>&, bool);
+ int readNames(string, map<string, string>&, int);
int readNames(string, map<string, vector<string> >&);
int readNames(string, vector<seqPriorityNode>&, map<string, string>&);
int mothurRemove(string);
bool mothurConvert(string, int&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
+ bool mothurConvert(string, intDist&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
bool mothurConvert(string, float&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
bool mothurConvert(string, double&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
int control_pressed;
bool executing, runParse, jumble, gui, mothurCalling, debug;
- //current files - if you add a new type you must edit optionParser->getParameters, get.current command and mothurOut->printCurrentFiles/clearCurrentFiles.
+ //current files - if you add a new type you must edit optionParser->getParameters, get.current command and mothurOut->printCurrentFiles/clearCurrentFiles/getCurrentTypes.
string getPhylipFile() { return phylipfile; }
string getColumnFile() { return columnfile; }
string getListFile() { return listfile; }
string getTaxonomyFile() { return taxonomyfile; }
string getFlowFile() { return flowfile; }
string getBiomFile() { return biomfile; }
+ string getCountTableFile() { return counttablefile; }
string getProcessors() { return processors; }
void setListFile(string f) { listfile = getFullPathName(f); }
void setTreeFile(string f) { treefile = getFullPathName(f); }
- void setGroupFile(string f) { groupfile = getFullPathName(f); }
+ void setGroupFile(string f) { groupfile = getFullPathName(f); groupMode = "group"; }
void setPhylipFile(string f) { phylipfile = getFullPathName(f); }
void setColumnFile(string f) { columnfile = getFullPathName(f); }
void setNameFile(string f) { namefile = getFullPathName(f); }
void setTaxonomyFile(string f) { taxonomyfile = getFullPathName(f); }
void setFlowFile(string f) { flowfile = getFullPathName(f); }
void setBiomFile(string f) { biomfile = getFullPathName(f); }
- void setProcessors(string p) { processors = p; }
+ void setCountTableFile(string f) { counttablefile = getFullPathName(f); groupMode = "count"; }
+ void setProcessors(string p) { processors = p; mothurOut("\nUsing " + toString(p) + " processors.\n"); }
void printCurrentFiles();
bool hasCurrentFiles();
void clearCurrentFiles();
+ set<string> getCurrentTypes();
private:
static MothurOut* _uniqueInstance;
processors = "1";
flowfile = "";
biomfile = "";
+ counttablefile = "";
gui = false;
printedHeaders = false;
commandInputsConvertError = false;
mothurCalling = false;
debug = false;
sharedHeaderMode = "";
+ groupMode = "group";
}
~MothurOut();
string releaseDate, version;
string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile;
- string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile;
+ string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile, counttablefile;
vector<string> Groups;
vector<string> namesOfGroups;
m->openInputFile(nameMapFile, fileHandle);
}
-
+//**********************************************************************************************************************
+NameAssignment::NameAssignment(){ m = MothurOut::getInstance(); }
//**********************************************************************************************************************
void NameAssignment::readMap(){
try{
string firstCol, secondCol, skip;
// int index = 0;
-
+
map<string, int>::iterator itData;
int rowIndex = 0;
while(fileHandle){
- fileHandle >> firstCol; //read from first column
+ fileHandle >> firstCol; m->gobble(fileHandle); //read from first column
fileHandle >> secondCol; //read from second column
itData = (*this).find(firstCol);
class NameAssignment : public map<string,int> {
public:
NameAssignment(string);
- NameAssignment(){};
+ NameAssignment();
void readMap();
ListVector getListVector();
int get(string);
}
}
//**********************************************************************************************************************
+string NewCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string tag = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fileType1") { tag = "tag1"; }
+ else if (type == "fileType2") { tag = "tag2"; }
+ else if (type == "fileType3") { tag = "tag3"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return tag;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "NewCommand", "getOutputFileName");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
NewCommand::NewCommand(){
try {
abort = true; calledHelp = true;
vector<string> tempOutNames;
outputTypes["fileType1"] = tempOutNames; //filetypes should be things like: shared, fasta, accnos...
outputTypes["fileType2"] = tempOutNames;
- outputTypes["FileType2"] = tempOutNames;
+ outputTypes["FileType3"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "NewCommand", "NewCommand");
vector<string> setParameters();
string getCommandName() { return "newCommandNameToBeSeenByUser"; }
string getCommandCategory() { return "commandCategory"; }
+ string getOutputFileNameTag(string, string);
//commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
- string getHelpString();
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/newCommandNameToBeSeenByUser"; }
string getDescription() { return "brief description"; }
}
}
//**********************************************************************************************************************
+string NMDSCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "nmds") { outputFileName = "nmds.axes"; }
+ else if (type == "stress") { outputFileName = "nmds.stress"; }
+ else if (type == "iters") { outputFileName = "nmds.iters"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "NMDSCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
NMDSCommand::NMDSCommand(){
try {
abort = true; calledHelp = true;
vector< vector<double> > axes;
if (axesfile != "") { axes = readAxes(names); }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.iters";
- string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.stress";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + getOutputFileNameTag("iters");
+ string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + getOutputFileNameTag("stress");
outputNames.push_back(outputFileName); outputTypes["iters"].push_back(outputFileName);
outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
out.close(); out2.close();
//output best config
- string BestFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.axes";
+ string BestFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + getOutputFileNameTag("nmds");
outputNames.push_back(BestFileName); outputTypes["nmds"].push_back(BestFileName);
m->mothurOut("\nNumber of dimensions:\t" + toString(bestDim) + "\n");
vector<string> setParameters();
string getCommandName() { return "nmds"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Borg, Groenen (1997). Non-metric multidimensional scaling function using the majorization algorithm, in Modern Multidimensional Scaling. Ed. T.F. Cox and M.A.A. Cox. Chapman and Hall. \nhttp://www.mothur.org/wiki/Nmds"; }
string getDescription() { return "nmds"; }
NoCommand() {}
~NoCommand(){}
- vector<string> setParameters() { return outputNames; } //dummy, doesn't really do anything
- string getCommandName() { return "NoCommand"; }
- string getCommandCategory() { return "Hidden"; }
- string getHelpString() { return "No Command"; }
- string getCitation() { return "no citation"; }
- string getDescription() { return "no description"; }
+ vector<string> setParameters() { return outputNames; } //dummy, doesn't really do anything
+ string getCommandName() { return "NoCommand"; }
+ string getCommandCategory() { return "Hidden"; }
+ string getHelpString() { return "No Command"; }
+ string getOutputFileNameTag(string, string) { return ""; }
+ string getCitation() { return "no citation"; }
+ string getDescription() { return "no description"; }
int execute();
}
}
-
+//**********************************************************************************************************************
+string NormalizeSharedCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "shared") { outputFileName = "norm.shared"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "NormalizeSharedCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
NormalizeSharedCommand::NormalizeSharedCommand(){
try {
if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + ".norm.shared";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + "." + getOutputFileNameTag("shared");
ofstream out;
m->openOutputFile(outputFileName, out);
outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
//save mothurOut's binLabels to restore for next label
vector<string> saveBinLabels = m->currentBinLabels;
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + ".norm.shared";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + "." + getOutputFileNameTag("shared");
ofstream out;
m->openOutputFile(outputFileName, out);
outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
vector<string> setParameters();
string getCommandName() { return "normalize.shared"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Normalize.shared"; }
string getDescription() { return "normalize samples in a shared or relabund file"; }
exit(1);
}
}
+/***********************************************************************/
+OptionParser::OptionParser(string option, map<string, string>& copy) {
+ try {
+ m = MothurOut::getInstance();
+ if (option != "") {
+
+ string key, value;
+ //reads in parameters and values
+ while((option.find_first_of(',') != -1)) { //while there are parameters
+ m->splitAtComma(value, option);
+ m->splitAtEquals(key, value);
+ if ((key == "candidate") || (key == "query")) { key = "fasta"; }
+ if (key == "template") { key = "reference"; }
+ parameters[key] = value;
+ }
+
+ //in case there is no comma and to get last parameter after comma
+ m->splitAtEquals(key, option);
+ if ((key == "candidate") || (key == "query")) { key = "fasta"; }
+ if (key == "template") { key = "reference"; }
+ parameters[key] = option;
+ }
+
+ copy = parameters;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "OptionParser", "OptionParser");
+ exit(1);
+ }
+}
/***********************************************************************/
map<string, string> OptionParser::getParameters() {
//loop through parameters and look for "current" so you can return the appropriate file
//doing it here to avoid code duplication in each of the commands
- map<string, string>::iterator it;
- for (it = parameters.begin(); it != parameters.end();) {
-
- if (it->second == "current") {
-
- //look for file types
- if (it->first == "fasta") {
- it->second = m->getFastaFile();
- }else if (it->first == "qfile") {
- it->second = m->getQualFile();
- }else if (it->first == "phylip") {
- it->second = m->getPhylipFile();
- }else if (it->first == "column") {
- it->second = m->getColumnFile();
- }else if (it->first == "list") {
- it->second = m->getListFile();
- }else if (it->first == "rabund") {
- it->second = m->getRabundFile();
- }else if (it->first == "sabund") {
- it->second = m->getSabundFile();
- }else if (it->first == "name") {
- it->second = m->getNameFile();
- }else if (it->first == "group") {
- it->second = m->getGroupFile();
- }else if (it->first == "order") {
- it->second = m->getOrderFile();
- }else if (it->first == "ordergroup") {
- it->second = m->getOrderGroupFile();
- }else if (it->first == "tree") {
- it->second = m->getTreeFile();
- }else if (it->first == "shared") {
- it->second = m->getSharedFile();
- }else if (it->first == "relabund") {
- it->second = m->getRelAbundFile();
- }else if (it->first == "design") {
- it->second = m->getDesignFile();
- }else if (it->first == "sff") {
- it->second = m->getSFFFile();
- }else if (it->first == "oligos") {
- it->second = m->getOligosFile();
- }else if (it->first == "accnos") {
- it->second = m->getAccnosFile();
- }else if (it->first == "taxonomy") {
- it->second = m->getTaxonomyFile();
- }else if (it->first == "biom") {
+
+ map<string, string>::iterator it;
+ for (it = parameters.begin(); it != parameters.end();) {
+
+ if (it->second == "current") {
+
+ //look for file types
+ if (it->first == "fasta") {
+ it->second = m->getFastaFile();
+ }else if (it->first == "qfile") {
+ it->second = m->getQualFile();
+ }else if (it->first == "phylip") {
+ it->second = m->getPhylipFile();
+ }else if (it->first == "column") {
+ it->second = m->getColumnFile();
+ }else if (it->first == "list") {
+ it->second = m->getListFile();
+ }else if (it->first == "rabund") {
+ it->second = m->getRabundFile();
+ }else if (it->first == "sabund") {
+ it->second = m->getSabundFile();
+ }else if (it->first == "name") {
+ it->second = m->getNameFile();
+ }else if (it->first == "group") {
+ it->second = m->getGroupFile();
+ }else if (it->first == "order") {
+ it->second = m->getOrderFile();
+ }else if (it->first == "ordergroup") {
+ it->second = m->getOrderGroupFile();
+ }else if (it->first == "tree") {
+ it->second = m->getTreeFile();
+ }else if (it->first == "shared") {
+ it->second = m->getSharedFile();
+ }else if (it->first == "relabund") {
+ it->second = m->getRelAbundFile();
+ }else if (it->first == "design") {
+ it->second = m->getDesignFile();
+ }else if (it->first == "sff") {
+ it->second = m->getSFFFile();
+ }else if (it->first == "oligos") {
+ it->second = m->getOligosFile();
+ }else if (it->first == "accnos") {
+ it->second = m->getAccnosFile();
+ }else if (it->first == "taxonomy") {
+ it->second = m->getTaxonomyFile();
+ }else if (it->first == "biom") {
it->second = m->getBiomFile();
- }else {
- m->mothurOut("[ERROR]: mothur does not save a current file for " + it->first); m->mothurOutEndLine();
- }
-
- if (it->second == "") { //no file was saved for that type, warn and remove from parameters
- m->mothurOut("[WARNING]: no file was saved for " + it->first + " parameter."); m->mothurOutEndLine();
- parameters.erase(it++);
- }else {
- m->mothurOut("Using " + it->second + " as input file for the " + it->first + " parameter."); m->mothurOutEndLine();
- it++;
- }
- }else{ it++; }
- }
+ }else if (it->first == "count") {
+ it->second = m->getCountTableFile();
+ }else {
+ m->mothurOut("[ERROR]: mothur does not save a current file for " + it->first); m->mothurOutEndLine();
+ }
+
+ if (it->second == "") { //no file was saved for that type, warn and remove from parameters
+ m->mothurOut("[WARNING]: no file was saved for " + it->first + " parameter."); m->mothurOutEndLine();
+ parameters.erase(it++);
+ }else {
+ m->mothurOut("Using " + it->second + " as input file for the " + it->first + " parameter."); m->mothurOutEndLine();
+ it++;
+ }
+ }else{ it++; }
+ }
return parameters;
}
class OptionParser {
public:
OptionParser(string);
+ OptionParser(string, map<string, string>&);
~OptionParser() {}
map<string, string> getParameters();
bool getNameFile(vector<string>);
}
}
//**********************************************************************************************************************
+string OTUAssociationCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "otucorr") { outputFileName = "otu.corr"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "OTUAssociationCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
OTUAssociationCommand::OTUAssociationCommand(){
try {
abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
- outputTypes["otu.corr"] = tempOutNames;
+ outputTypes["otucorr"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "OTUAssociationCommand", "OTUAssociationCommand");
}
vector<string> tempOutNames;
- outputTypes["otu.corr"] = tempOutNames;
+ outputTypes["otucorr"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
int OTUAssociationCommand::process(vector<SharedRAbundVector*>& lookup){
try {
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + method + ".otu.corr";
- outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + method + "." + getOutputFileNameTag("otucorr");
+ outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
int OTUAssociationCommand::process(vector<SharedRAbundFloatVector*>& lookup){
try {
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + method + ".otu.corr";
- outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + method + "." + getOutputFileNameTag("otucorr");
+ outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
vector<string> setParameters();
string getCommandName() { return "otu.association"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Otu.association"; }
string getDescription() { return "calculate the correlation coefficient for the otus in a shared/relabund file"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string OtuHierarchyCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "otuheirarchy") { outputFileName = "otu.hierarchy"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "OtuHierarchyCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
OtuHierarchyCommand::OtuHierarchyCommand(){
try {
}
ofstream out;
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + lists[0].getLabel() + "-" + lists[1].getLabel() + ".otu.hierarchy";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + lists[0].getLabel() + "-" + lists[1].getLabel() + "." + getOutputFileNameTag("otuheirarchy");
m->openOutputFile(outputFileName, out);
//go through each bin in "big" otu and output the bins in "little" otu which created it
vector<string> setParameters();
string getCommandName() { return "otu.hierarchy"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Otu.hierarchy"; }
string getDescription() { return "relates OTUs at different distances"; }
CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
- CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
+ CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(poutput);
CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
exit(1);
}
}
-
+//**********************************************************************************************************************
+string PairwiseSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "phylip") { outputFileName = "dist"; }
+ else if (type == "column") { outputFileName = "dist"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PairwiseSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
PairwiseSeqsCommand::PairwiseSeqsCommand(){
try {
align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; }
output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; }
+ if (output=="phylip") { output = "lt"; }
if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
calc = validParameter.validFile(parameters, "calc", false);
string outputFile = "";
if (output == "lt") { //does the user want lower triangle phylip formatted file
- outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip.dist";
+ outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip." + getOutputFileNameTag("phylip");
m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile);
}else if (output == "column") { //user wants column format
- outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "dist";
+ outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("column");
outputTypes["column"].push_back(outputFile);
m->mothurRemove(outputFile);
}else { //assume square
- outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square.dist";
+ outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square." + getOutputFileNameTag("phylip");
m->mothurRemove(outputFile);
outputTypes["phylip"].push_back(outputFile);
}
vector<string> setParameters();
string getCommandName() { return "pairwise.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Needleman SB, Wunsch CD (1970). A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48: 443-53. [ for needleman ]\nGotoh O (1982). An improved algorithm for matching biological sequences. J Mol Biol 162: 705-8. [ for gotoh ] \nhttp://www.mothur.org/wiki/Pairwise.seqs"; }
string getDescription() { return "calculates pairwise distances from an unaligned fasta file"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string ParseFastaQCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "fasta"; }
+ else if (type == "qfile") { outputFileName = "qual"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
ParseFastaQCommand::ParseFastaQCommand(){
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//open Output Files
- string fastaFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "fasta";
- string qualFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "qual";
+ string fastaFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + getOutputFileNameTag("fasta");
+ string qualFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + getOutputFileNameTag("qfile");
ofstream outFasta, outQual;
if (fasta) { m->openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile); }
vector<string> setParameters();
string getCommandName() { return "parse.fastq"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Parse.fastq"; }
string getDescription() { return "reads a fastq file and creates a fasta and quality file"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string ParseListCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "list") { outputFileName = "list"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseListCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
ParseListCommand::ParseListCommand(){
try {
temp = new ofstream;
filehandles[gGroups[i]] = temp;
- string filename = fileroot + gGroups[i] + ".list";
+ string filename = fileroot + gGroups[i] + "." + getOutputFileNameTag("list");
outputNames.push_back(filename); outputTypes["list"].push_back(filename);
m->openOutputFile(filename, *temp);
}
vector<string> setParameters();
string getCommandName() { return "parse.list"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Parse.list"; }
string getDescription() { return "parses a list file by group"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string ParsimonyCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "parsimony") { outputFileName = "parsimony"; }
+ else if (type == "psummary") { outputFileName = "psummary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParsimonyCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
ParsimonyCommand::ParsimonyCommand(){
delete reader;
if(outputDir == "") { outputDir += m->hasPath(treefile); }
- output = new ColumnFile(outputDir + m->getSimpleName(treefile) + ".parsimony", itersString);
- outputNames.push_back(outputDir + m->getSimpleName(treefile) + ".parsimony");
- outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(treefile) + ".parsimony");
+ output = new ColumnFile(outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("parsimony"), itersString);
+ outputNames.push_back(outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("parsimony"));
+ outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("parsimony"));
- sumFile = outputDir + m->getSimpleName(treefile) + ".psummary";
+ sumFile = outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("psummary");
m->openOutputFile(sumFile, outSum);
outputNames.push_back(sumFile);
outputTypes["psummary"].push_back(sumFile);
vector<string> setParameters();
string getCommandName() { return "parsimony"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Slatkin M, Maddison WP (1989). A cladistic measure of gene flow inferred from the phylogenies of alleles. Genetics 123: 603-13. \nSlatkin M, Maddison WP (1990). Detecting isolation by distance using phylogenies of genes. Genetics 126: 249-60. \nMartin AP (2002). Phylogenetic approaches for describing and comparing the diversity of microbial communities. Appl Environ Microbiol 68: 3673-82. \nSchloss PD, Handelsman J (2006). Introducing TreeClimber, a test to compare microbial community structure. Appl Environ Microbiol 72: 2379-84.\nhttp://www.mothur.org/wiki/Parsimony"; }
string getDescription() { return "generic test that describes whether two or more communities have the same structure"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string PCACommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "pca") { outputFileName = "pca.axes"; }
+ else if (type == "loadings") { outputFileName = "pca.loadings"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PCACommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
PCACommand::PCACommand(){
try {
dsum += d[i];
}
- ofstream pcaData((fnameRoot+".pca.axes").c_str(), ios::trunc);
+ ofstream pcaData;
+ m->openOutputFile((fnameRoot+"."+getOutputFileNameTag("pca")), pcaData);
pcaData.setf(ios::fixed, ios::floatfield);
pcaData.setf(ios::showpoint);
- outputNames.push_back(fnameRoot+".pca.axes");
- outputTypes["pca"].push_back(fnameRoot+".pca.axes");
+ outputNames.push_back(fnameRoot+"."+getOutputFileNameTag("pca"));
+ outputTypes["pca"].push_back(fnameRoot+"."+getOutputFileNameTag("pca"));
- ofstream pcaLoadings((fnameRoot+".pca.loadings").c_str(), ios::trunc);
+ ofstream pcaLoadings;
+ m->openOutputFile((fnameRoot+"."+getOutputFileNameTag("loadings")), pcaLoadings);
pcaLoadings.setf(ios::fixed, ios::floatfield);
pcaLoadings.setf(ios::showpoint);
- outputNames.push_back(fnameRoot+".pca.loadings");
- outputTypes["loadings"].push_back(fnameRoot+".pca.loadings");
+ outputNames.push_back(fnameRoot+"."+getOutputFileNameTag("loadings"));
+ outputTypes["loadings"].push_back(fnameRoot+"."+getOutputFileNameTag("loadings"));
pcaLoadings << "axis\tloading\n";
for(int i=0;i<numEigenValues;i++){
vector<string> setParameters();
string getCommandName() { return "pca"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Pca"; }
string getDescription() { return "pca"; }
}
}
+//**********************************************************************************************************************
+string PCOACommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "pcoa") { outputFileName = "pcoa.axes"; }
+ else if (type == "loadings") { outputFileName = "pcoa.loadings"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PCOACommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
PCOACommand::PCOACommand(){
}
}
- ofstream pcaData((fnameRoot+"pcoa.axes").c_str(), ios::trunc);
+ ofstream pcaData;
+ string pcoaDataFile = fnameRoot+getOutputFileNameTag("pcoa");
+ m->openOutputFile(pcoaDataFile, pcaData);
pcaData.setf(ios::fixed, ios::floatfield);
pcaData.setf(ios::showpoint);
- outputNames.push_back(fnameRoot+"pcoa.axes");
- outputTypes["pcoa"].push_back(fnameRoot+"pcoa.axes");
+ outputNames.push_back(pcoaDataFile);
+ outputTypes["pcoa"].push_back(pcoaDataFile);
- ofstream pcaLoadings((fnameRoot+"pcoa.loadings").c_str(), ios::trunc);
+ ofstream pcaLoadings;
+ string loadingsFile = fnameRoot+getOutputFileNameTag("loadings");
+ m->openOutputFile(loadingsFile, pcaLoadings);
pcaLoadings.setf(ios::fixed, ios::floatfield);
pcaLoadings.setf(ios::showpoint);
- outputNames.push_back(fnameRoot+"pcoa.loadings");
- outputTypes["loadings"].push_back(fnameRoot+"pcoa.loadings");
+ outputNames.push_back(loadingsFile);
+ outputTypes["loadings"].push_back(loadingsFile);
pcaLoadings << "axis\tloading\n";
for(int i=0;i<rank;i++){
vector<string> setParameters();
string getCommandName() { return "pcoa"; }
string getCommandCategory() { return "Hypothesis Testing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Pcoa"; }
string getDescription() { return "pcoa"; }
vector<string> setParameters();
string getCommandName() { return "pcr.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
string getDescription() { return "pcr.seqs"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string PhyloDiversityCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "phylodiv") { outputFileName = "phylodiv"; }
+ else if (type == "rarefy") { outputFileName = "phylodiv.rarefaction"; }
+ else if (type == "summary") { outputFileName = "phylodiv.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloDiversityCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
PhyloDiversityCommand::PhyloDiversityCommand(){
if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
ofstream outSum, outRare, outCollect;
- string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.summary";
- string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.rarefaction";
- string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv";
+ string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + "." + getOutputFileNameTag("summary");
+ string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + "." + getOutputFileNameTag("rarefy");
+ string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + "." + getOutputFileNameTag("phylodiv");
if (summary) { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile); outputTypes["summary"].push_back(outSumFile); }
if (rarefy) { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); outputTypes["rarefy"].push_back(outRareFile); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
- m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run phylo.diversity."); m->mothurOutEndLine();
m->mothurOutEndLine();
for (int l = 0; l < numIters; l++) {
random_shuffle(randomLeaf.begin(), randomLeaf.end());
- cout << l << endl;
+
//initialize counts
map<string, int> counts;
vector< map<string, bool> > countedBranch;
vector<string> setParameters();
string getCommandName() { return "phylo.diversity"; }
string getCommandCategory() { return "Hypothesis Testing"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "Faith DP (1994). Phylogenetic pattern and the quantification of organismal biodiversity. Philos Trans R Soc Lond B Biol Sci 345: 45-58. \nhttp://www.mothur.org/wiki/Phylo.diversity"; }
string getDescription() { return "phylo.diversity"; }
int PhyloSummary::summarize(string userTfile){
try {
-
- ifstream in;
- m->openInputFile(userTfile, in);
-
- //read in users taxonomy file and add sequences to tree
- string name, tax;
- int numSeqs = 0;
- while(!in.eof()){
- in >> name >> tax; m->gobble(in);
-
- addSeqToTree(name, tax);
+ map<string, string> temp;
+ m->readTax(userTfile, temp);
+
+ for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+ addSeqToTree(itTemp->first, itTemp->second);
numSeqs++;
-
- if (m->control_pressed) { break; }
- }
- in.close();
-
- return numSeqs;
+ temp.erase(itTemp++);
+ }
+
+ return numSeqs;
}
catch(exception& e) {
m->errorOut(e, "PhyloSummary", "summarize");
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
- ifstream in;
- m->openInputFile(tfile, in);
-
- //read in users taxonomy file and add sequences to tree
- while(!in.eof()){
- in >> name >> tax; m->gobble(in);
-
- addSeqToTree(name, tax);
- }
- in.close();
+ map<string, string> temp;
+ m->readTax(tfile, temp);
+
+ for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+ addSeqToTree(itTemp->first, itTemp->second);
+ temp.erase(itTemp++);
+ }
#endif
assignHeirarchyIDs(0);
exit(1);
}
}
-
+//**********************************************************************************************************************
+string PhylotypeCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "list") { outputFileName = "list"; }
+ else if (type == "rabund") { outputFileName = "rabund"; }
+ else if (type == "sabund") { outputFileName = "sabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhylotypeCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
PhylotypeCommand::PhylotypeCommand(){
try {
string fileroot = outputDir + m->getRootName(m->getSimpleName(taxonomyFileName));
ofstream outList;
- string outputListFile = fileroot + "tx.list";
+ string outputListFile = fileroot + "tx." + getOutputFileNameTag("list");
m->openOutputFile(outputListFile, outList);
ofstream outSabund;
- string outputSabundFile = fileroot + "tx.sabund";
+ string outputSabundFile = fileroot + "tx." + getOutputFileNameTag("sabund");
m->openOutputFile(outputSabundFile, outSabund);
ofstream outRabund;
- string outputRabundFile = fileroot + "tx.rabund";
+ string outputRabundFile = fileroot + "tx." + getOutputFileNameTag("rabund");
m->openOutputFile(outputRabundFile, outRabund);
outputNames.push_back(outputListFile); outputTypes["list"].push_back(outputListFile);
vector<string> setParameters();
string getCommandName() { return "phylotype"; }
string getCommandCategory() { return "Clustering"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Phylotype"; }
string getDescription() { return "cluster your sequences into OTUs based on their classifications"; }
vector<string> setParameters();
string getCommandName() { return "pipeline.pds"; }
string getCommandCategory() { return "Hidden"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Pipeline.pds"; }
string getDescription() { return "pat's pipeline"; }
}
}
-
+//**********************************************************************************************************************
+string PcrSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "pcr.fasta"; }
+ else if (type == "taxonomy") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "accnos") { outputFileName = "bad.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
PcrSeqsCommand::PcrSeqsCommand(){
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.fasta";
+ string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
- string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.scrap.fasta";
+ string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "scrap." + getOutputFileNameTag("fasta");
length = 0;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos");
outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
ofstream out;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pcr" + m->getExtension(namefile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pcr" + m->getExtension(groupfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pcr" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
ofstream out;
m->openOutputFile(outputFileName, out);
exit(1);
}
}
-
+//**********************************************************************************************************************
+string PreClusterCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "precluster" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "precluster.names"; }
+ else if (type == "map") { outputFileName = "precluster.map"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PreClusterCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
PreClusterCommand::PreClusterCommand(){
try {
int start = time(NULL);
string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
- string newFastaFile = fileroot + "precluster" + m->getExtension(fastafile);
- string newNamesFile = fileroot + "precluster.names";
- string newMapFile = fileroot + "precluster.map"; //add group name if by group
+ string newFastaFile = fileroot + getOutputFileNameTag("fasta", fastafile);
+ string newNamesFile = fileroot + getOutputFileNameTag("name");
+ string newMapFile = fileroot + getOutputFileNameTag("map"); //add group name if by group
outputNames.push_back(newFastaFile); outputTypes["fasta"].push_back(newFastaFile);
outputNames.push_back(newNamesFile); outputTypes["name"].push_back(newNamesFile);
vector<string> setParameters();
string getCommandName() { return "pre.cluster"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Pre.cluster"; }
string getDescription() { return "implements a pseudo-single linkage algorithm with the goal of removing sequences that are likely due to pyrosequencing errors"; }
string getCommandName() { return "quit"; }
string getCommandCategory() { return "Hidden"; }
string getHelpString() { return "The quit command will terminate mothur and should be in the following format: quit() or quit. \n"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getCitation() { return "no citation"; }
string getDescription() { return "quit"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string RareFactCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "rarefaction") { outputFileName = "rarefaction"; }
+ else if (type == "r_chao") { outputFileName = "r_chao"; }
+ else if (type == "r_ace") { outputFileName = "r_ace"; }
+ else if (type == "r_jack") { outputFileName = "r_jack"; }
+ else if (type == "r_shannon") { outputFileName = "r_shannon"; }
+ else if (type == "r_shannoneven") { outputFileName = "r_shannoneven"; }
+ else if (type == "r_smithwilson") { outputFileName = "r_smithwilson"; }
+ else if (type == "r_npshannon") { outputFileName = "r_npshannon"; }
+ else if (type == "r_simpson") { outputFileName = "r_simpson"; }
+ else if (type == "r_simpsoneven") { outputFileName = "r_simpsoneven"; }
+ else if (type == "r_invsimpson") { outputFileName = "r_invsimpson"; }
+ else if (type == "r_bootstrap") { outputFileName = "r_bootstrap"; }
+ else if (type == "r_coverage") { outputFileName = "r_coverage"; }
+ else if (type == "r_nseqs") { outputFileName = "r_nseqs"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RareFactCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
RareFactCommand::RareFactCommand(){
for (i=0; i<Estimators.size(); i++) {
if (validCalculator.isValidCalculator("rarefaction", Estimators[i]) == true) {
if (Estimators[i] == "sobs") {
- rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction")));
- outputNames.push_back(fileNameRoot+"rarefaction"); outputTypes["rarefaction"].push_back(fileNameRoot+"rarefaction");
+ rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("rarefaction"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("rarefaction")); outputTypes["rarefaction"].push_back(fileNameRoot+getOutputFileNameTag("rarefaction"));
}else if (Estimators[i] == "chao") {
- rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao")));
- outputNames.push_back(fileNameRoot+"r_chao"); outputTypes["r_chao"].push_back(fileNameRoot+"r_chao");
+ rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_chao"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_chao")); outputTypes["r_chao"].push_back(fileNameRoot+getOutputFileNameTag("r_chao"));
}else if (Estimators[i] == "ace") {
if(abund < 5)
abund = 10;
- rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace")));
- outputNames.push_back(fileNameRoot+"r_ace"); outputTypes["r_ace"].push_back(fileNameRoot+"r_ace");
+ rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_ace"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_ace")); outputTypes["r_ace"].push_back(fileNameRoot+getOutputFileNameTag("r_ace"));
}else if (Estimators[i] == "jack") {
- rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack")));
- outputNames.push_back(fileNameRoot+"r_jack"); outputTypes["r_jack"].push_back(fileNameRoot+"r_jack");
+ rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_jack"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_jack")); outputTypes["r_jack"].push_back(fileNameRoot+getOutputFileNameTag("r_jack"));
}else if (Estimators[i] == "shannon") {
- rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon")));
- outputNames.push_back(fileNameRoot+"r_shannon"); outputTypes["r_shannon"].push_back(fileNameRoot+"r_shannon");
+ rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_shannon"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_shannon")); outputTypes["r_shannon"].push_back(fileNameRoot+getOutputFileNameTag("r_shannon"));
}else if (Estimators[i] == "shannoneven") {
- rDisplays.push_back(new RareDisplay(new ShannonEven(), new ThreeColumnFile(fileNameRoot+"r_shannoneven")));
- outputNames.push_back(fileNameRoot+"r_shannoneven"); outputTypes["r_shannoneven"].push_back(fileNameRoot+"r_shannoneven");
+ rDisplays.push_back(new RareDisplay(new ShannonEven(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_shannoneven"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_shannoneven")); outputTypes["r_shannoneven"].push_back(fileNameRoot+getOutputFileNameTag("r_shannoneven"));
}else if (Estimators[i] == "heip") {
- rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(fileNameRoot+"r_heip")));
- outputNames.push_back(fileNameRoot+"r_heip"); outputTypes["r_heip"].push_back(fileNameRoot+"r_heip");
+ rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_heip"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_heip")); outputTypes["r_heip"].push_back(fileNameRoot+getOutputFileNameTag("r_heip"));
}else if (Estimators[i] == "smithwilson") {
- rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(fileNameRoot+"r_smithwilson")));
- outputNames.push_back(fileNameRoot+"r_smithwilson"); outputTypes["r_smithwilson"].push_back(fileNameRoot+"r_smithwilson");
+ rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_smithwilson"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_smithwilson")); outputTypes["r_smithwilson"].push_back(fileNameRoot+getOutputFileNameTag("r_smithwilson"));
}else if (Estimators[i] == "npshannon") {
- rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon")));
- outputNames.push_back(fileNameRoot+"r_npshannon"); outputTypes["r_npshannon"].push_back(fileNameRoot+"r_npshannon");
+ rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_npshannon"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_npshannon")); outputTypes["r_npshannon"].push_back(fileNameRoot+getOutputFileNameTag("r_npshannon"));
}else if (Estimators[i] == "simpson") {
- rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
- outputNames.push_back(fileNameRoot+"r_simpson"); outputTypes["r_simpson"].push_back(fileNameRoot+"r_simpson");
+ rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_simpson"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_simpson")); outputTypes["r_simpson"].push_back(fileNameRoot+getOutputFileNameTag("r_simpson"));
}else if (Estimators[i] == "simpsoneven") {
- rDisplays.push_back(new RareDisplay(new SimpsonEven(), new ThreeColumnFile(fileNameRoot+"r_simpsoneven")));
- outputNames.push_back(fileNameRoot+"r_simpsoneven"); outputTypes["r_simpsoneven"].push_back(fileNameRoot+"r_simpsoneven");
+ rDisplays.push_back(new RareDisplay(new SimpsonEven(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_simpsoneven"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_simpsoneven")); outputTypes["r_simpsoneven"].push_back(fileNameRoot+getOutputFileNameTag("r_simpsoneven"));
}else if (Estimators[i] == "invsimpson") {
- rDisplays.push_back(new RareDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+"r_invsimpson")));
- outputNames.push_back(fileNameRoot+"r_invsimpson"); outputTypes["r_invsimpson"].push_back(fileNameRoot+"r_invsimpson");
+ rDisplays.push_back(new RareDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_invsimpson"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_invsimpson")); outputTypes["r_invsimpson"].push_back(fileNameRoot+getOutputFileNameTag("r_invsimpson"));
}else if (Estimators[i] == "bootstrap") {
- rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
- outputNames.push_back(fileNameRoot+"r_bootstrap"); outputTypes["r_bootstrap"].push_back(fileNameRoot+"r_bootstrap");
+ rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_bootstrap"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_bootstrap")); outputTypes["r_bootstrap"].push_back(fileNameRoot+getOutputFileNameTag("r_bootstrap"));
}else if (Estimators[i] == "coverage") {
- rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
- outputNames.push_back(fileNameRoot+"r_coverage"); outputTypes["r_coverage"].push_back(fileNameRoot+"r_coverage");
+ rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_coverage"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_coverage")); outputTypes["r_coverage"].push_back(fileNameRoot+getOutputFileNameTag("r_coverage"));
}else if (Estimators[i] == "nseqs") {
- rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
- outputNames.push_back(fileNameRoot+"r_nseqs"); outputTypes["r_nseqs"].push_back(fileNameRoot+"r_nseqs");
+ rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+getOutputFileNameTag("r_nseqs"))));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("r_nseqs")); outputTypes["r_nseqs"].push_back(fileNameRoot+getOutputFileNameTag("r_nseqs"));
}
if (inputFileNames.size() > 1) { file2Group[outputNames.size()-1] = groups[p]; }
}
vector<string> setParameters();
string getCommandName() { return "rarefaction.single"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Magurran AE (2004). Measuring biological diversity. Blackwell Pub.: Malden, Ma. \nhttp://www.mothur.org/wiki/Rarefaction.single"; }
string getDescription() { return "generate intra-sample rarefaction curves using a re-sampling without replacement approach"; }
#include "sharedsobs.h"
#include "sharednseqs.h"
#include "sharedutilities.h"
+#include "subsample.h"
//**********************************************************************************************************************
vector<string> RareFactSharedCommand::setParameters(){
CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
+ CommandParameter psubsampleiters("subsampleiters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(psubsampleiters);
+ CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
helpString += "Example rarefaction.shared(label=unique-0.01-0.03, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
+ helpString += "The subsampleiters parameter allows you to choose the number of times you would like to run the subsample.\n";
+ helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
helpString += validCalculator.printCalc("sharedrarefaction");
helpString += "The label parameter is used to analyze specific labels in your input.\n";
exit(1);
}
}
-
+//**********************************************************************************************************************
+string RareFactSharedCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "sharedrarefaction") { outputFileName = "shared.rarefaction"; }
+ else if (type == "sharedr_nseqs") { outputFileName = "shared.r_nseqs"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RareFactSharedCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
RareFactSharedCommand::RareFactSharedCommand(){
try {
temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "T"; }
groupMode = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "subsampleiters", false); if (temp == "not found") { temp = "1000"; }
+ m->mothurConvert(temp, iters);
+
+ temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
+ if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+ else {
+ if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
+ else { subsample = false; }
+ }
+
+ if (subsample == false) { iters = 1; }
}
}
}
}
-
+
+ /******************************************************/
+ if (subsample) {
+ if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+ subsampleSize = subset[0]->getNumSeqs();
+ for (int i = 1; i < subset.size(); i++) {
+ int thisSize = subset[i]->getNumSeqs();
+
+ if (thisSize < subsampleSize) { subsampleSize = thisSize; }
+ }
+ }else {
+ newGroups.clear();
+ vector<SharedRAbundVector*> temp;
+ for (int i = 0; i < subset.size(); i++) {
+ if (subset[i]->getNumSeqs() < subsampleSize) {
+ m->mothurOut(subset[i]->getGroup() + " contains " + toString(subset[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+ delete subset[i];
+ }else {
+ newGroups.push_back(subset[i]->getGroup());
+ temp.push_back(subset[i]);
+ }
+ }
+ subset = temp;
+ }
+
+ if (subset.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+ }
+ /******************************************************/
+
ValidCalculators validCalculator;
for (int i=0; i<Estimators.size(); i++) {
if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
if (Estimators[i] == "sharedobserved") {
- rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+"shared.rarefaction", "")));
- outputNames.push_back(fileNameRoot+"shared.rarefaction"); outputTypes["sharedrarefaction"].push_back(fileNameRoot+"shared.rarefaction");
+ rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+getOutputFileNameTag("sharedrarefaction"), "")));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedrarefaction")); outputTypes["sharedrarefaction"].push_back(fileNameRoot+getOutputFileNameTag("sharedrarefaction"));
}else if (Estimators[i] == "sharednseqs") {
- rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(fileNameRoot+"shared.r_nseqs", "")));
- outputNames.push_back(fileNameRoot+"shared.r_nseqs"); outputTypes["sharedr_nseqs"].push_back(fileNameRoot+"shared.r_nseqs");
+ rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(fileNameRoot+getOutputFileNameTag("sharedr_nseqs"), "")));
+ outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedr_nseqs")); outputTypes["sharedr_nseqs"].push_back(fileNameRoot+getOutputFileNameTag("sharedr_nseqs"));
}
}
file2Group[outputNames.size()-1] = thisSet;
rCurve->getSharedCurve(freq, nIters);
delete rCurve;
+ if (subsample) { subsampleLookup(subset, fileNameRoot); }
+
processedLabels.insert(subset[0]->getLabel());
userLabels.erase(subset[0]->getLabel());
}
rCurve->getSharedCurve(freq, nIters);
delete rCurve;
+ if (subsample) { subsampleLookup(subset, fileNameRoot); }
+
processedLabels.insert(subset[0]->getLabel());
userLabels.erase(subset[0]->getLabel());
rCurve = new Rarefact(subset, rDisplays);
rCurve->getSharedCurve(freq, nIters);
delete rCurve;
+
+ if (subsample) { subsampleLookup(subset, fileNameRoot); }
+
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
}
}
}
//**********************************************************************************************************************
+int RareFactSharedCommand::subsampleLookup(vector<SharedRAbundVector*>& thisLookup, string fileNameRoot) {
+ try {
+
+ map<string, vector<string> > filenames;
+ for (int thisIter = 0; thisIter < iters; thisIter++) {
+
+ vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+
+ //we want the summary results for the whole dataset, then the subsampling
+ SubSample sample;
+ vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+
+ //make copy of lookup so we don't get access violations
+ vector<SharedRAbundVector*> newLookup;
+ for (int k = 0; k < thisItersLookup.size(); k++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(thisItersLookup[k]->getLabel());
+ temp->setGroup(thisItersLookup[k]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ //for each bin
+ for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+ for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+ }
+
+ tempLabels = sample.getSample(newLookup, subsampleSize);
+ thisItersLookup = newLookup;
+
+
+ Rarefact* rCurve;
+ vector<Display*> rDisplays;
+
+ string thisfileNameRoot = fileNameRoot + toString(thisIter);
+
+ ValidCalculators validCalculator;
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
+ if (Estimators[i] == "sharedobserved") {
+ rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(thisfileNameRoot+getOutputFileNameTag("sharedrarefaction"), "")));
+ filenames["sharedrarefaction"].push_back(thisfileNameRoot+getOutputFileNameTag("sharedrarefaction"));
+ }else if (Estimators[i] == "sharednseqs") {
+ rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(thisfileNameRoot+getOutputFileNameTag("sharedr_nseqs"), "")));
+ filenames["sharedr_nseqs"].push_back(thisfileNameRoot+getOutputFileNameTag("sharedr_nseqs"));
+ }
+ }
+ }
+
+ rCurve = new Rarefact(thisItersLookup, rDisplays);
+ rCurve->getSharedCurve(freq, nIters);
+ delete rCurve;
+
+ //clean up memory
+ for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+ thisItersLookup.clear();
+ for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
+ }
+
+ //create std and ave outputs
+ vector< vector< vector< double > > > results; //iter -> numSampled -> data
+ for (map<string, vector<string> >::iterator it = filenames.begin(); it != filenames.end(); it++) {
+ vector<string> thisTypesFiles = it->second;
+ vector<string> columnHeaders;
+ for (int i = 0; i < thisTypesFiles.size(); i++) {
+ ifstream in;
+ m->openInputFile(thisTypesFiles[i], in);
+
+ string headers = m->getline(in); m->gobble(in);
+ columnHeaders = m->splitWhiteSpace(headers);
+ int numCols = columnHeaders.size();
+
+ vector<vector<double> > thisFilesLines;
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+ vector<double> data; data.resize(numCols, 0);
+ //read numSampled line
+ for (int j = 0; j < numCols; j++) { in >> data[j]; m->gobble(in); }
+ thisFilesLines.push_back(data);
+ }
+ in.close();
+ results.push_back(thisFilesLines);
+ m->mothurRemove(thisTypesFiles[i]);
+ }
+
+ if (!m->control_pressed) {
+ //process results
+ string outputFile = fileNameRoot + "ave-std." + thisLookup[0]->getLabel() + "." + getOutputFileNameTag(it->first);
+ ofstream out;
+ m->openOutputFile(outputFile, out);
+ outputNames.push_back(outputFile); outputTypes[it->first].push_back(outputFile);
+
+ out << columnHeaders[0] << '\t' << "method\t";
+ for (int i = 1; i < columnHeaders.size(); i++) { out << columnHeaders[i] << '\t'; }
+ out << endl;
+
+ vector< vector<double> > aveResults; aveResults.resize(results[0].size());
+ for (int i = 0; i < aveResults.size(); i++) { aveResults[i].resize(results[0][i].size(), 0.0); }
+
+ for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
+ for (int i = 0; i < aveResults.size(); i++) { //initialize sums to zero.
+ aveResults[i][0] = results[thisIter][i][0];
+ for (int j = 1; j < aveResults[i].size(); j++) {
+ aveResults[i][j] += results[thisIter][i][j];
+ }
+ }
+ }
+
+ for (int i = 0; i < aveResults.size(); i++) { //finds average.
+ for (int j = 1; j < aveResults[i].size(); j++) {
+ aveResults[i][j] /= (float) iters;
+ }
+ }
+
+ //standard deviation
+ vector< vector<double> > stdResults; stdResults.resize(results[0].size());
+ for (int i = 0; i < stdResults.size(); i++) { stdResults[i].resize(results[0][i].size(), 0.0); }
+
+ for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+ for (int i = 0; i < stdResults.size(); i++) {
+ stdResults[i][0] = aveResults[i][0];
+ for (int j = 1; j < stdResults[i].size(); j++) {
+ stdResults[i][j] += ((results[thisIter][i][j] - aveResults[i][j]) * (results[thisIter][i][j] - aveResults[i][j]));
+ }
+ }
+ }
+
+ for (int i = 0; i < stdResults.size(); i++) { //finds average.
+ out << aveResults[i][0] << '\t' << "ave\t";
+ for (int j = 1; j < aveResults[i].size(); j++) { out << aveResults[i][j] << '\t'; }
+ out << endl;
+ out << stdResults[i][0] << '\t' << "std\t";
+ for (int j = 1; j < stdResults[i].size(); j++) {
+ stdResults[i][j] /= (float) iters;
+ stdResults[i][j] = sqrt(stdResults[i][j]);
+ out << stdResults[i][j] << '\t';
+ }
+ out << endl;
+ }
+ out.close();
+ }
+ }
+
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RareFactSharedCommand", "subsample");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
vector<string> RareFactSharedCommand::createGroupFile(vector<string>& outputNames) {
try {
vector<string> setParameters();
string getCommandName() { return "rarefaction.shared"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Magurran AE (2004). Measuring biological diversity. Blackwell Pub.: Malden, Ma. \nhttp://www.mothur.org/wiki/Rarefaction.shared"; }
string getDescription() { return "generate inter-sample rarefaction curves using a re-sampling without replacement approach"; }
private:
vector<SharedRAbundVector*> lookup;
- int nIters;
+ int nIters, subsampleSize, iters;
string format;
float freq;
map<int, string> file2Group; //index in outputNames[i] -> group
- bool abort, allLines, jumble, groupMode;
+ bool abort, allLines, jumble, groupMode, subsample;
set<string> labels; //holds labels to be used
string label, calc, groups, outputDir, sharedfile, designfile;
vector<string> Estimators, Groups, outputNames, Sets;
int process(GroupMap&, string);
vector<string> createGroupFile(vector<string>&);
+ int subsampleLookup(vector<SharedRAbundVector*>&, string);
};
//create objects needed for read
if (!hclusterWanted) {
- matrix = new SparseMatrix();
+ matrix = new SparseDistanceMatrix();
+ matrix->resize(nseqs);
}else{
overlapFile = m->getRootName(blastfile) + "overlap.dist";
distFile = m->getRootName(blastfile) + "hclusterDists.dist";
//is this distance below cutoff
if (distance < cutoff) {
if (!hclusterWanted) {
- PCell value(itA->second, it->first, distance);
- matrix->addCell(value);
+ if (itA->second < it->first) {
+ PDistCell value(it->first, distance);
+ matrix->addCell(itA->second, value);
+ }else {
+ PDistCell value(itA->second, distance);
+ matrix->addCell(it->first, value);
+ }
}else{
outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl;
}
//is this distance below cutoff
if (distance < cutoff) {
if (!hclusterWanted) {
- PCell value(itA->second, it->first, distance);
- matrix->addCell(value);
+ if (itA->second < it->first) {
+ PDistCell value(it->first, distance);
+ matrix->addCell(itA->second, value);
+ }else {
+ PDistCell value(itA->second, distance);
+ matrix->addCell(it->first, value);
+ }
}else{
outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl;
}
*/
#include "mothur.h"
-#include "sparsematrix.hpp"
+#include "sparsedistancematrix.h"
#include "nameassignment.hpp"
/****************************************************************************************/
~ReadBlast() {}
int read(NameAssignment*);
- SparseMatrix* getDistMatrix() { return matrix; }
+ SparseDistanceMatrix* getDistMatrix() { return matrix; }
vector<seqDist> getOverlapMatrix() { return overlap; }
string getOverlapFile() { return overlapFile; }
string getDistFile() { return distFile; }
bool minWanted; //if true choose min bsr, if false choose max bsr
bool hclusterWanted;
- SparseMatrix* matrix;
+ SparseDistanceMatrix* matrix;
vector<seqDist> overlap;
MothurOut* m;
string firstName, secondName;
float distance;
int nseqs = nameMap->size();
-
+ DMatrix->resize(nseqs);
list = new ListVector(nameMap->getListVector());
Progress* reading = new Progress("Reading matrix: ", nseqs * nseqs);
map<string,int>::iterator itA = nameMap->find(firstName);
map<string,int>::iterator itB = nameMap->find(secondName);
-
+
if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); }
if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); }
if(distance < cutoff && itA != itB){
if(itA->second > itB->second){
- PCell value(itA->second, itB->second, distance);
-
+ PDistCell value(itA->second, distance);
+
+
if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
refRow = itA->second;
refCol = itB->second;
- D->addCell(value);
+ DMatrix->addCell(itB->second, value);
}
else if(refRow == itA->second && refCol == itB->second){
lt = 0;
}
else{
- D->addCell(value);
+ DMatrix->addCell(itB->second, value);
}
}
else if(itA->second < itB->second){
- PCell value(itB->second, itA->second, distance);
+ PDistCell value(itB->second, distance);
if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
refRow = itA->second;
refCol = itB->second;
- D->addCell(value);
+ DMatrix->addCell(itA->second, value);
}
else if(refRow == itB->second && refCol == itA->second){
lt = 0;
}
else{
- D->addCell(value);
+ DMatrix->addCell(itA->second, value);
}
}
reading->update(itA->second * nseqs);
if(lt == 0){ // oops, it was square
fileHandle.close(); //let's start over
- D->clear(); //let's start over
+ DMatrix->clear(); //let's start over
m->openInputFile(distFile, fileHandle); //let's start over
else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
if(distance < cutoff && itA->second > itB->second){
- PCell value(itA->second, itB->second, distance);
- D->addCell(value);
+ PDistCell value(itA->second, distance);
+ DMatrix->addCell(itB->second, value);
reading->update(itA->second * nseqs);
}
exit(1);
}
}
-
/***********************************************************************/
-ReadColumnMatrix::~ReadColumnMatrix(){
- //delete D;
- //delete list;
+int ReadColumnMatrix::read(CountTable* countTable){
+ try {
+
+ string firstName, secondName;
+ float distance;
+ int nseqs = countTable->size();
+
+ DMatrix->resize(nseqs);
+ list = new ListVector(countTable->getListVector());
+
+ Progress* reading = new Progress("Reading matrix: ", nseqs * nseqs);
+
+ int lt = 1;
+ int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
+ int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix
+
+ //need to see if this is a square or a triangular matrix...
+
+ while(fileHandle && lt == 1){ //let's assume it's a triangular matrix...
+
+
+ fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance
+
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ int itA = countTable->get(firstName);
+ int itB = countTable->get(secondName);
+
+ if (m->control_pressed) { exit(1); }
+
+ if (distance == -1) { distance = 1000000; }
+ else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
+
+ if(distance < cutoff && itA != itB){
+ if(itA > itB){
+ PDistCell value(itA, distance);
+
+
+ if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
+ refRow = itA;
+ refCol = itB;
+ DMatrix->addCell(itB, value);
+ }
+ else if(refRow == itA && refCol == itB){
+ lt = 0;
+ }
+ else{
+ DMatrix->addCell(itB, value);
+ }
+ }
+ else if(itA < itB){
+ PDistCell value(itB, distance);
+
+ if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
+ refRow = itA;
+ refCol = itB;
+ DMatrix->addCell(itA, value);
+ }
+ else if(refRow == itB && refCol == itA){
+ lt = 0;
+ }
+ else{
+ DMatrix->addCell(itA, value);
+ }
+ }
+ reading->update(itA * nseqs);
+ }
+ m->gobble(fileHandle);
+ }
+
+ if(lt == 0){ // oops, it was square
+
+ fileHandle.close(); //let's start over
+ DMatrix->clear(); //let's start over
+
+ m->openInputFile(distFile, fileHandle); //let's start over
+
+ while(fileHandle){
+ fileHandle >> firstName >> secondName >> distance;
+
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ int itA = countTable->get(firstName);
+ int itB = countTable->get(secondName);
+
+
+ if (m->control_pressed) { exit(1); }
+
+ if (distance == -1) { distance = 1000000; }
+ else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
+
+ if(distance < cutoff && itA > itB){
+ PDistCell value(itA, distance);
+ DMatrix->addCell(itB, value);
+ reading->update(itA * nseqs);
+ }
+
+ m->gobble(fileHandle);
+ }
+ }
+
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ reading->finish();
+ fileHandle.close();
+
+ list->setLabel("0");
+
+ return 1;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ReadColumnMatrix", "read");
+ exit(1);
+ }
}
+/***********************************************************************/
+ReadColumnMatrix::~ReadColumnMatrix(){}
+/***********************************************************************/
ReadColumnMatrix(string, bool);
~ReadColumnMatrix();
int read(NameAssignment*);
+ int read(CountTable*);
private:
ifstream fileHandle;
string distFile;
+++ /dev/null
-/*
- * readdistcommand.cpp
- * Mothur
- *
- * Created by Sarah Westcott on 1/20/09.
- * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "readdistcommand.h"
-#include "readphylip.h"
-#include "readcolumn.h"
-#include "readmatrix.hpp"
-
-//**********************************************************************************************************************
-ReadDistCommand::ReadDistCommand(string option) {
- try {
- abort = false; calledHelp = false;
-
- //allow user to run help
- if(option == "help") { help(); abort = true; calledHelp = true; }
- else if(option == "citation") { citation(); abort = true; calledHelp = true;}
-
- else {
- /*//valid paramters for this command
- string Array[] = {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-
- OptionParser parser(option);
- map<string, string> parameters = parser.getParameters();
-
- ValidParameters validParameter;
- map<string,string>::iterator it;
-
- //check to make sure all parameters are valid for command
- for (it = parameters.begin(); it != parameters.end(); it++) {
- if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
- }
-
- globaldata->newRead();
-
- //if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
- if (inputDir == "not found"){ inputDir = ""; }
- else {
- string path;
- it = parameters.find("phylip");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["phylip"] = inputDir + it->second; }
- }
-
- it = parameters.find("column");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["column"] = inputDir + it->second; }
- }
-
- it = parameters.find("name");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["name"] = inputDir + it->second; }
- }
-
- it = parameters.find("group");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["group"] = inputDir + it->second; }
- }
- }
-
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
- //check for required parameters
- phylipfile = validParameter.validFile(parameters, "phylip", true);
- if (phylipfile == "not open") { abort = true; }
- else if (phylipfile == "not found") { phylipfile = ""; }
- else { globaldata->setPhylipFile(phylipfile); globaldata->setFormat("phylip"); }
-
- columnfile = validParameter.validFile(parameters, "column", true);
- if (columnfile == "not open") { abort = true; }
- else if (columnfile == "not found") { columnfile = ""; }
- else { globaldata->setColumnFile(columnfile); globaldata->setFormat("column"); }
-
- groupfile = validParameter.validFile(parameters, "group", true);
- if (groupfile == "not open") { abort = true; }
- else if (groupfile == "not found") { groupfile = ""; }
- else {
- globaldata->setGroupFile(groupfile);
- //groupMap = new GroupMap(groupfile);
- //groupMap->readMap();
- }
-
- namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
- else if (namefile == "not found") { namefile = ""; }
- else { globaldata->setNameFile(namefile); }
-
- //you are doing a list and group shared
- if ((phylipfile != "") && (groupfile != "")) {
- globaldata->setFormat("matrix"); }
-
- if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a read.dist command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
- else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a read.dist command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
-
- if (columnfile != "") {
- if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
- }
-
- //check for optional parameter and set defaults
- // ...at some point should added some additional type checking...
- //get user cutoff and precision or use defaults
- string temp;
- temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
- m->mothurConvert(temp, precision);
-
- temp = validParameter.validFile(parameters, "sim", false); if (temp == "not found") { temp = "F"; }
- sim = m->isTrue(temp);
- globaldata->sim = sim;
-
- temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; }
- convert(temp, cutoff);
- cutoff += (5 / (precision * 10.0));
-
- if (abort == false) {
- distFileName = globaldata->inputFileName;
- format = globaldata->getFormat();
-
- if (format == "column") { read = new ReadColumnMatrix(distFileName); }
- else if (format == "phylip") { read = new ReadPhylipMatrix(distFileName); }
- else if (format == "matrix") {
- groupMap = new GroupMap(groupfile);
- int error = groupMap->readMap();
- if (error == 1) { delete groupMap; abort = true; }
- else {
- if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
- globaldata->gGroupmap = groupMap;
- }
- }
-
- if (format != "matrix" ) {
- read->setCutoff(cutoff);
-
- if(namefile != ""){
- nameMap = new NameAssignment(namefile);
- nameMap->readMap();
- }else{
- nameMap = NULL;
- }
- }
- }
-*/
- }
-
- }
- catch(exception& e) {
- m->errorOut(e, "ReadDistCommand", "ReadDistCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int ReadDistCommand::execute(){
- try {
-
- if (abort == true) { if (calledHelp) { return 0; } return 2; }
- m->mothurOut(getHelpString()); m->mothurOutEndLine();
-/*
- time_t start = time(NULL);
- size_t numDists = 0;
-
- if (format == "matrix") {
- ifstream in;
- m->openInputFile(distFileName, in);
- matrix = new FullMatrix(in); //reads the matrix file
- in.close();
-
- if (m->control_pressed) { delete groupMap; delete matrix; return 0; }
-
- //if files don't match...
- if (matrix->getNumSeqs() < groupMap->getNumSeqs()) {
- m->mothurOut("Your distance file contains " + toString(matrix->getNumSeqs()) + " sequences, and your group file contains " + toString(groupMap->getNumSeqs()) + " sequences."); m->mothurOutEndLine();
- //create new group file
- if(outputDir == "") { outputDir += m->hasPath(groupfile); }
-
- string newGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "editted.groups";
- outputNames.push_back(newGroupFile);
- ofstream outGroups;
- m->openOutputFile(newGroupFile, outGroups);
-
- for (int i = 0; i < matrix->getNumSeqs(); i++) {
- if (m->control_pressed) { delete groupMap; delete matrix; outGroups.close(); m->mothurRemove(newGroupFile); return 0; }
-
- Names temp = matrix->getRowInfo(i);
- outGroups << temp.seqName << '\t' << temp.groupName << endl;
- }
- outGroups.close();
-
- m->mothurOut(newGroupFile + " is a new group file containing only the sequence that are in your distance file. I will read this file instead."); m->mothurOutEndLine();
-
- //read new groupfile
- delete groupMap; groupMap = NULL;
- groupfile = newGroupFile;
- globaldata->setGroupFile(groupfile);
-
- groupMap = new GroupMap(groupfile);
- groupMap->readMap();
-
- if (m->control_pressed) { delete groupMap; delete matrix; m->mothurRemove(newGroupFile); return 0; }
-
- globaldata->gGroupmap = groupMap;
- }
-
- //memory leak prevention
- if (globaldata->gMatrix != NULL) { delete globaldata->gMatrix; }
- globaldata->gMatrix = matrix; //save matrix for coverage commands
- numDists = matrix->getSizes()[1];
- } else {
- read->read(nameMap);
- //to prevent memory leak
-
- if (m->control_pressed) { return 0; }
-
- if (globaldata->gListVector != NULL) { delete globaldata->gListVector; }
- globaldata->gListVector = read->getListVector();
-
- if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix; }
- globaldata->gSparseMatrix = read->getMatrix();
- numDists = globaldata->gSparseMatrix->getNNodes();
- }
-
- if (m->control_pressed) { return 0; }
-
- if (outputNames.size() != 0) {
- m->mothurOutEndLine();
- m->mothurOut("Output File Name: "); m->mothurOutEndLine();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
- m->mothurOutEndLine();
- }
-
- m->mothurOut("It took " + toString(time(NULL) - start) + " secs to read "); m->mothurOutEndLine();
- */
- return 0;
-
- }
- catch(exception& e) {
- m->errorOut(e, "ReadDistCommand", "execute");
- exit(1);
- }
-}
+++ /dev/null
-#ifndef READDISTCOMMAND_H
-#define READDISTCOMMAND_H
-/*
- * readdistcommand.h
- * Mothur
- *
- * Created by Sarah Westcott on 1/20/09.
- * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "command.hpp"
-#include "readmatrix.hpp"
-#include "fullmatrix.h"
-#include "groupmap.h"
-
-/* The read.dist command is used to read a distance matrix file.
-The read.dist command parameter options are phylipfile, columnfile, namefile, cutoff and precision.
-The read.dist command should be in the following format: read.dist(phylipfile=yourDistFile,
-namefile=yourNameFile, cutoff=yourCutoff, precision=yourPrecision). The phylipfile or columnfile are required and if you use a columnfile the namefile is required.
-If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed. */
-
-class NameAssignment;
-
-class ReadDistCommand : public Command {
-public:
- ReadDistCommand(string);
- ReadDistCommand() { abort = true; calledHelp = true; }
- ~ReadDistCommand() {}
-
- vector<string> setParameters() { return outputNames; } //dummy doesn't really do anything
- string getCommandName() { return "read.dist"; }
- string getCommandCategory() { return "Hidden"; }
- string getHelpString() { return "This command is no longer available. You can provide your distance files directly to the downstream commands like cluster."; }
- string getCitation() { return "http://www.mothur.org/wiki/Read.dist"; }
- string getDescription() { return "read.dist"; }
-
- int execute();
- void help() { m->mothurOut(getHelpString()); }
-
-private:
- double cutoff;
- int precision;
- ReadMatrix* read;
- FullMatrix* matrix;
- GroupMap* groupMap;
- string distFileName, format, method;
- string phylipfile, columnfile, namefile, groupfile, outputDir;
- NameAssignment* nameMap;
- vector<string> outputNames;
-
- bool abort, sim;
-
-};
-
-#endif
#include "mothur.h"
#include "listvector.hpp"
-#include "sparsematrix.hpp"
#include "nameassignment.hpp"
+#include "counttable.h"
+#include "sparsedistancematrix.h"
class SparseMatrix;
class ReadMatrix {
public:
- ReadMatrix(){ D = new SparseMatrix(); m = MothurOut::getInstance(); }
+ ReadMatrix(){ DMatrix = new SparseDistanceMatrix(); m = MothurOut::getInstance(); }
virtual ~ReadMatrix() {}
virtual int read(NameAssignment*){ return 1; }
+ virtual int read(CountTable*){ return 1; }
void setCutoff(float c) { cutoff = c; }
- SparseMatrix* getMatrix() { return D; }
+ SparseDistanceMatrix* getDMatrix() { return DMatrix; }
ListVector* getListVector() { return list; }
-// OrderVector* getOrderVector() { return order; }
int successOpen;
protected:
- SparseMatrix* D;
+ SparseDistanceMatrix* DMatrix;
ListVector* list;
float cutoff;
MothurOut* m;
+++ /dev/null
-/*
- * readotu.cpp
- * Mothur
- *
- * Created by Sarah Westcott on 1/20/09.
- * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
- *
- */
-
-#include "readotucommand.h"
-
-
-//**********************************************************************************************************************
-ReadOtuCommand::ReadOtuCommand(){
- try {
- abort = true; calledHelp = true;
- setParameters();
- vector<string> tempOutNames;
- outputTypes["rabund"] = tempOutNames;
- outputTypes["shared"] = tempOutNames;
- }
- catch(exception& e) {
- m->errorOut(e, "ReadOtuCommand", "ReadOtuCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-ReadOtuCommand::ReadOtuCommand(string option) {
- try {
- abort = false; calledHelp = false;
- allLines = 1;
-
- //allow user to run help
- if(option == "help") { help(); abort = true; calledHelp = true; }
- else if(option == "citation") { citation(); abort = true; calledHelp = true;}
-
- else {
- /*
- //valid paramters for this command
- string Array[] = {"list","order","shared","relabund","label","group","sabund", "rabund","groups","ordergroup","outputdir","inputdir"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-
- OptionParser parser(option);
- map<string, string> parameters = parser.getParameters();
-
- ValidParameters validParameter;
- map<string, string>::iterator it;
-
- //check to make sure all parameters are valid for command
- for (it = parameters.begin(); it != parameters.end(); it++) {
- if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
- }
-
- //initialize outputTypes
- vector<string> tempOutNames;
- outputTypes["rabund"] = tempOutNames;
- outputTypes["shared"] = tempOutNames;
-
- globaldata->newRead();
-
- //if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
- if (inputDir == "not found"){ inputDir = ""; }
- else {
- string path;
- it = parameters.find("list");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["list"] = inputDir + it->second; }
- }
-
- it = parameters.find("order");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["order"] = inputDir + it->second; }
- }
-
- it = parameters.find("shared");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["shared"] = inputDir + it->second; }
- }
-
- it = parameters.find("group");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["group"] = inputDir + it->second; }
- }
-
- it = parameters.find("sabund");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["sabund"] = inputDir + it->second; }
- }
-
- it = parameters.find("rabund");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["rabund"] = inputDir + it->second; }
- }
-
- it = parameters.find("ordergroup");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["ordergroup"] = inputDir + it->second; }
- }
-
- it = parameters.find("relabund");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["relabund"] = inputDir + it->second; }
- }
- }
-
-
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
- //check for required parameters
- listfile = validParameter.validFile(parameters, "list", true);
- if (listfile == "not open") { abort = true; }
- else if (listfile == "not found") { listfile = ""; }
- else { globaldata->setListFile(listfile); globaldata->setFormat("list"); }
-
- sabundfile = validParameter.validFile(parameters, "sabund", true);
- if (sabundfile == "not open") { abort = true; }
- else if (sabundfile == "not found") { sabundfile = ""; }
- else { globaldata->setSabundFile(sabundfile); globaldata->setFormat("sabund"); }
-
- rabundfile = validParameter.validFile(parameters, "rabund", true);
- if (rabundfile == "not open") { abort = true; }
- else if (rabundfile == "not found") { rabundfile = ""; }
- else { globaldata->setRabundFile(rabundfile); globaldata->setFormat("rabund");}
-
- ordergroupfile = validParameter.validFile(parameters, "ordergroup", true);
- if (ordergroupfile == "not open") { abort = true; }
- else if (ordergroupfile == "not found") { ordergroupfile = ""; }
- else { globaldata->setOrderGroupFile(ordergroupfile); }
-
- sharedfile = validParameter.validFile(parameters, "shared", true);
- if (sharedfile == "not open") { abort = true; }
- else if (sharedfile == "not found") { sharedfile = ""; }
- else { globaldata->setSharedFile(sharedfile); globaldata->setFormat("sharedfile"); }
-
- relAbundfile = validParameter.validFile(parameters, "relabund", true);
- if (relAbundfile == "not open") { abort = true; }
- else if (relAbundfile == "not found") { relAbundfile = ""; }
- else { globaldata->setRelAbundFile(relAbundfile); globaldata->setFormat("relabund"); }
-
-
- groupfile = validParameter.validFile(parameters, "group", true);
- if (groupfile == "not open") { abort = true; }
- else if (groupfile == "not found") { groupfile = ""; }
- else {
- globaldata->setGroupFile(groupfile);
- groupMap = new GroupMap(groupfile);
-
- int error = groupMap->readMap();
- if (error == 1) { abort = true; }
-
- globaldata->gGroupmap = groupMap;
- }
-
- groups = validParameter.validFile(parameters, "groups", false);
- if (groups == "not found") { groups = ""; }
- else {
- m->splitAtDash(groups, Groups);
- globaldata->Groups = Groups;
- }
-
- //you are doing a list and group shared
- if ((listfile != "") && (groupfile != "")) { globaldata->setFormat("shared"); }
-
- //you have not given a file
- if ((listfile == "") && (sharedfile == "") && (rabundfile == "") && (sabundfile == "") && (relAbundfile == "")) {
- m->mothurOut("You must enter either a listfile, rabundfile, sabundfile, relabund or a sharedfile with the read.otu command. "); m->mothurOutEndLine(); abort = true;
- }
-
- //check for optional parameter and set defaults
- // ...at some point should added some additional type checking...
- label = validParameter.validFile(parameters, "label", false);
- if (label == "not found") { label = ""; }
- else {
- if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
- else { allLines = 1; }
- globaldata->labels = labels;
- }
-
- globaldata->allLines = allLines;
-
- orderfile = validParameter.validFile(parameters, "order", true);
- if (orderfile == "not open") { abort = true; }
- else if (orderfile == "not found") { orderfile = ""; }
- else { globaldata->setOrderFile(orderfile); }
-
-
- if (abort == false) {
- //gets whichever one of the above is set
- filename = globaldata->inputFileName;
- }
- */
- }
-
- }
- catch(exception& e) {
- m->errorOut(e, "ReadOtuCommand", "ReadOtuCommand");
- exit(1);
- }
-}
-///**********************************************************************************************************************
-
-int ReadOtuCommand::execute(){
- try {
-
- if (abort == true) { if (calledHelp) { return 0; } return 2; }
- m->mothurOut(getHelpString()); m->mothurOutEndLine();
-
- /*
- if (globaldata->getFormat() == "shared") {
-
- shared = new SharedCommand(outputDir);
- int okay = shared->execute();
-
- //problem with shared
- if (okay == 1) {
- globaldata->setListFile("");
- globaldata->setGroupFile("");
- globaldata->setSharedFile("");
- }else { //shared command outputs the filenames
- //m->mothurOutEndLine();
- //m->mothurOut("Output File Name: "); m->mothurOutEndLine();
- //m->mothurOut(globaldata->getSharedFile()); m->mothurOutEndLine();
- //m->mothurOutEndLine();
- }
-
- outputTypes = shared->getOutputFiles();
-
- //set rabund file as new current rabundfile
- string current = "";
- itTypes = outputTypes.find("rabund");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
- }
-
- itTypes = outputTypes.find("shared");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
- }
-
- delete shared;
- }
- */
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ReadOtuCommand", "execute");
- exit(1);
- }
-}
-//**********************************************************************************************************************
+++ /dev/null
-#ifndef READOTUCOMMAND_H
-#define READOTUCOMMAND_H
-/*
- * readotu.h
- * Mothur
- *
- * Created by Sarah Westcott on 1/20/09.
- * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "command.hpp"
-#include "inputdata.h"
-#include "groupmap.h"
-#include "sharedcommand.h"
-
-class ReadOtuCommand : public Command {
-public:
- ReadOtuCommand(string);
- ReadOtuCommand();
- ~ReadOtuCommand() {}
-
- vector<string> setParameters() { return outputNames; } //dummy doesn't really do anything
- string getCommandName() { return "read.otu"; }
- string getCommandCategory() { return "Hidden"; }
- string getHelpString() { return "This command is no longer available. You can provide your files directly to the downstream commands like collect.shared."; }
- string getCitation() { return "http://www.mothur.org/wiki/Read.otu"; }
- string getDescription() { return "read.otu"; }
-
- int execute();
- void help() { m->mothurOut(getHelpString()); }
-
-private:
- InputData* input;
- Command* shared;
- GroupMap* groupMap;
- string filename, listfile, orderfile, sharedfile, label, groupfile, sabundfile, rabundfile, format, groups, outputDir, ordergroupfile, relAbundfile;
- vector<string> Groups, outputNames;
- map<string, vector<string> > outputTypes;
-
- bool abort, allLines;
- set<string> labels; //holds labels to be used
-
-};
-
-#endif
try {
float distance;
- int square, nseqs;
+ int square, nseqs;
string name;
vector<string> matrixNames;
}
Progress* reading;
-
+ DMatrix->resize(nseqs);
+
if(square == 0){
reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2);
if (m->control_pressed) { delete reading; fileHandle.close(); return 0; }
fileHandle >> distance;
-
if (distance == -1) { distance = 1000000; }
else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
if(distance < cutoff){
- PCell value(i, j, distance);
- D->addCell(value);
+ PDistCell value(i, distance);
+ DMatrix->addCell(j, value);
}
index++;
reading->update(index);
else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
if(distance < cutoff){
- PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
- D->addCell(value);
+ PDistCell value(nameMap->get(matrixNames[i]), distance);
+ DMatrix->addCell(nameMap->get(matrixNames[j]), value);
}
index++;
reading->update(index);
else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
if(distance < cutoff && j < i){
- PCell value(i, j, distance);
- D->addCell(value);
+ PDistCell value(i, distance);
+ DMatrix->addCell(j, value);
}
index++;
reading->update(index);
else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
if(distance < cutoff && j < i){
- PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
- D->addCell(value);
+ PDistCell value(nameMap->get(matrixNames[i]), distance);
+ DMatrix->addCell(nameMap->get(matrixNames[j]), value);
}
index++;
reading->update(index);
reading->finish();
delete reading;
-
+
list->setLabel("0");
fileHandle.close();
- /* if(nameMap != NULL){
- for(int i=0;i<matrixNames.size();i++){
- nameMap->erase(matrixNames[i]);
- }
- if(nameMap->size() > 0){
- //should probably tell them what is missing if we missed something
- m->mothurOut("missed something\t" + toString(nameMap->size())); m->mothurOutEndLine();
- }
- } */
-
+
return 1;
}
exit(1);
}
}
-
/***********************************************************************/
-ReadPhylipMatrix::~ReadPhylipMatrix(){
- // delete D;
- // delete list;
+int ReadPhylipMatrix::read(CountTable* countTable){
+ try {
+
+ float distance;
+ int square, nseqs;
+ string name;
+ vector<string> matrixNames;
+
+ string numTest;
+ fileHandle >> numTest >> name;
+
+ if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+ else { convert(numTest, nseqs); }
+
+ matrixNames.push_back(name);
+
+ if(countTable == NULL){
+ list = new ListVector(nseqs);
+ list->set(0, name);
+ }
+ else{ list = new ListVector(countTable->getListVector()); }
+
+ if (m->control_pressed) { return 0; }
+
+ char d;
+ while((d=fileHandle.get()) != EOF){
+
+ if(isalnum(d)){
+ square = 1;
+ fileHandle.putback(d);
+ for(int i=0;i<nseqs;i++){
+ fileHandle >> distance;
+ }
+ break;
+ }
+ if(d == '\n'){
+ square = 0;
+ break;
+ }
+ }
+
+ Progress* reading;
+ DMatrix->resize(nseqs);
+
+ if(square == 0){
+
+ reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2);
+
+ int index = 0;
+
+ for(int i=1;i<nseqs;i++){
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ fileHandle >> name;
+ matrixNames.push_back(name);
+
+
+ //there's A LOT of repeated code throughout this method...
+ if(countTable == NULL){
+ list->set(i, name);
+
+ for(int j=0;j<i;j++){
+
+ if (m->control_pressed) { delete reading; fileHandle.close(); return 0; }
+
+ fileHandle >> distance;
+
+ if (distance == -1) { distance = 1000000; }
+ else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
+
+ if(distance < cutoff){
+ PDistCell value(i, distance);
+ DMatrix->addCell(j, value);
+ }
+ index++;
+ reading->update(index);
+ }
+
+ }
+ else{
+ for(int j=0;j<i;j++){
+ fileHandle >> distance;
+
+ if (m->control_pressed) { delete reading; fileHandle.close(); return 0; }
+
+ if (distance == -1) { distance = 1000000; }
+ else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
+
+ if(distance < cutoff){
+ int iIndex = countTable->get(matrixNames[i]);
+ int jIndex = countTable->get(matrixNames[j]);
+
+ if (m->control_pressed) { delete reading; fileHandle.close(); return 0; }
+ if (iIndex < jIndex) {
+ PDistCell value(jIndex, distance);
+ DMatrix->addCell(iIndex, value);
+ }else {
+ PDistCell value(iIndex, distance);
+ DMatrix->addCell(jIndex, value);
+
+ }
+ }
+ index++;
+ reading->update(index);
+ }
+ }
+ }
+ }
+ else{
+
+ reading = new Progress("Reading matrix: ", nseqs * nseqs);
+
+ int index = nseqs;
+
+ for(int i=1;i<nseqs;i++){
+ fileHandle >> name;
+ matrixNames.push_back(name);
+
+
+
+ if(countTable == NULL){
+ list->set(i, name);
+ for(int j=0;j<nseqs;j++){
+ fileHandle >> distance;
+
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ if (distance == -1) { distance = 1000000; }
+ else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
+
+ if(distance < cutoff && j < i){
+ PDistCell value(i, distance);
+ DMatrix->addCell(j, value);
+ }
+ index++;
+ reading->update(index);
+ }
+
+ }
+ else{
+ for(int j=0;j<nseqs;j++){
+ fileHandle >> distance;
+
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ if (distance == -1) { distance = 1000000; }
+ else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
+
+ if(distance < cutoff && j < i){
+ int iIndex = countTable->get(matrixNames[i]);
+ int jIndex = countTable->get(matrixNames[j]);
+
+ if (m->control_pressed) { delete reading; fileHandle.close(); return 0; }
+ if (iIndex < jIndex) {
+ PDistCell value(jIndex, distance);
+ DMatrix->addCell(iIndex, value);
+ }else {
+ PDistCell value(iIndex, distance);
+ DMatrix->addCell(jIndex, value);
+
+ }
+ }
+ index++;
+ reading->update(index);
+ }
+ }
+ }
+ }
+
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ reading->finish();
+ delete reading;
+
+ list->setLabel("0");
+ fileHandle.close();
+
+
+ return 1;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ReadPhylipMatrix", "read");
+ exit(1);
+ }
}
+/***********************************************************************/
+ReadPhylipMatrix::~ReadPhylipMatrix(){}
+/***********************************************************************/
+
ReadPhylipMatrix(string, bool);
~ReadPhylipMatrix();
int read(NameAssignment*);
+ int read(CountTable*);
private:
ifstream fileHandle;
string distFile;
+++ /dev/null
-/*
- * readtreecommand.cpp
- * Mothur
- *
- * Created by Sarah Westcott on 1/23/09.
- * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "readtreecommand.h"
-
-
-//**********************************************************************************************************************
-ReadTreeCommand::ReadTreeCommand(string option) {
- try {
- abort = false; calledHelp = false;
-
- //allow user to run help
- if(option == "help") { help(); abort = true; calledHelp = true; }
- else if(option == "citation") { citation(); abort = true; calledHelp = true;}
-
- else {
- /*
- //valid paramters for this command
- string Array[] = {"tree","group","name","outputdir","inputdir"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-
- OptionParser parser(option);
- map<string, string> parameters = parser.getParameters();
-
- ValidParameters validParameter;
- map<string, string>::iterator it;
-
- //check to make sure all parameters are valid for command
- for (it = parameters.begin(); it != parameters.end(); it++) {
- if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
- }
-
- globaldata->newRead();
-
- //if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
- if (inputDir == "not found"){ inputDir = ""; }
- else {
- string path;
- it = parameters.find("tree");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["tree"] = inputDir + it->second; }
- }
-
- it = parameters.find("group");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["group"] = inputDir + it->second; }
- }
-
- it = parameters.find("name");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["name"] = inputDir + it->second; }
- }
-
- }
-
-
- //check for required parameters
- treefile = validParameter.validFile(parameters, "tree", true);
- if (treefile == "not open") { abort = true; }
- else if (treefile == "not found") { treefile = ""; m->mothurOut("tree is a required parameter for the read.tree command."); m->mothurOutEndLine(); abort = true; }
-
- groupfile = validParameter.validFile(parameters, "group", true);
- if (groupfile == "not open") { abort = true; }
- else if (groupfile == "not found") {
- groupfile = "";
-
- m->mothurOut("You have not provided a group file. I am assumming all sequence are from the same group."); m->mothurOutEndLine();
-
- if (treefile != "") { Tree* tree = new Tree(treefile); delete tree; } //extracts names from tree to make faked out groupmap
-
- //read in group map info.
- treeMap = new TreeMap();
- for (int i = 0; i < m->Treenames.size(); i++) { treeMap->addSeq(m->Treenames[i], "Group1"); }
-
- }else {
- //read in group map info.
- treeMap = new TreeMap(groupfile);
- treeMap->readMap();
- }
-
- namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
- else if (namefile == "not found") { namefile = ""; }
- else { readNamesFile(); }
-
- if (abort == false) {
- filename = treefile;
- read = new ReadNewickTree(filename);
- }
- */
- }
- }
- catch(exception& e) {
- m->errorOut(e, "ReadTreeCommand", "ReadTreeCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-
-int ReadTreeCommand::execute(){
- try {
-
- if (abort == true) { if (calledHelp) { return 0; } return 2; }
- m->mothurOut(getHelpString()); m->mothurOutEndLine();
- /*
- int readOk;
-
- readOk = read->read(treeMap);
-
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); globaldata->gTree.clear(); delete globaldata->gTreemap; return 0; }
-
- vector<Tree*> T = read->gTree;
-
- //assemble users trees
- for (int i = 0; i < T.size(); i++) {
- if (m->control_pressed) {
- for (int i = 0; i < T.size(); i++) { delete T[i]; }
- globaldata->gTree.clear();
- delete globaldata->gTreemap;
- return 0;
- }
-
- T[i]->assembleTree();
- }
-
-
- //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
- int numNamesInTree;
- if (namefile != "") {
- if (numUniquesInName == globaldata->Treenames.size()) { numNamesInTree = nameMap.size(); }
- else { numNamesInTree = globaldata->Treenames.size(); }
- }else { numNamesInTree = globaldata->Treenames.size(); }
-
-
- //output any names that are in group file but not in tree
- if (numNamesInTree < treeMap->getNumSeqs()) {
- for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
- //is that name in the tree?
- int count = 0;
- for (int j = 0; j < globaldata->Treenames.size(); j++) {
- if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
- count++;
- }
-
- if (m->control_pressed) {
- for (int i = 0; i < T.size(); i++) { delete T[i]; }
- globaldata->gTree.clear();
- delete globaldata->gTreemap;
- return 0;
- }
-
- //then you did not find it so report it
- if (count == globaldata->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- treeMap->removeSeq(treeMap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
- }
- }
-
- globaldata->gTreemap = treeMap;
- }
- */
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ReadTreeCommand", "execute");
- exit(1);
- }
-}
-/*****************************************************************/
-int ReadTreeCommand::readNamesFile() {
- try {
- /*
- m->names.clear();
- numUniquesInName = 0;
-
- ifstream in;
- m->openInputFile(namefile, in);
-
- string first, second;
- map<string, string>::iterator itNames;
-
- while(!in.eof()) {
- in >> first >> second; m->gobble(in);
-
- numUniquesInName++;
-
- itNames = m->names.find(first);
- if (itNames == globaldata->names.end()) {
- globaldata->names[first] = second;
-
- //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
- vector<string> dupNames;
- m->splitAtComma(second, dupNames);
-
- for (int i = 0; i < dupNames.size(); i++) { nameMap[dupNames[i]] = dupNames[i]; if ((groupfile == "") && (i != 0)) { globaldata->gTreemap->addSeq(dupNames[i], "Group1"); } }
- }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); namefile = ""; return 1; }
- }
- in.close();
- */
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ReadTreeCommand", "readNamesFile");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
+++ /dev/null
-#ifndef READTREECOMMAND_H
-#define READTREECOMMAND_H
-
-/*
- * readtreecommand.h
- * Mothur
- *
- * Created by Sarah Westcott on 1/23/09.
- * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "command.hpp"
-#include "readtree.h"
-#include "treemap.h"
-
-
-class ReadTreeCommand : public Command {
-public:
- ReadTreeCommand(string);
- ReadTreeCommand() { abort = true; calledHelp = true; }
- ~ReadTreeCommand() {}
-
- vector<string> setParameters() { return outputNames; } //dummy doesn't really do anything
- string getCommandName() { return "read.tree"; }
- string getCommandCategory() { return "Hidden"; }
- string getHelpString() { return "This command is no longer available. You can provide your files directly to the downstream commands like unifrac.unweighted."; }
- string getCitation() { return "http://www.mothur.org/wiki/Read.tree"; }
- string getDescription() { return "read.tree"; }
-
- int execute();
- void help() { m->mothurOut(getHelpString()); }
-
-private:
- ReadTree* read;
- TreeMap* treeMap;
- string filename, treefile, groupfile, namefile;
- bool abort;
- map<string, string> nameMap;
- vector<string> outputNames;
-
- int readNamesFile();
- int numUniquesInName;
-
-};
-
-
-#endif
exit(1);
}
}
-
+//**********************************************************************************************************************
+string RemoveGroupsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveGroupsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
RemoveGroupsCommand::RemoveGroupsCommand(){
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//get groups you want to remove
- if (accnosfile != "") { readAccnos(); }
+ if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
if (groupfile != "") {
groupMap = new GroupMap(groupfile);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
ofstream out;
m->openOutputFile(outputFileName, out);
while(lookup[0] != NULL) {
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + ".pick" + m->getExtension(sharedfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("shared", sharedfile);
ofstream out;
m->openOutputFile(outputFileName, out);
outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(designfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(designfile)) + "pick" + m->getExtension(designfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(designfile)) + getOutputFileNameTag("design", designfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
ofstream out;
m->openOutputFile(outputFileName, out);
}
}
//**********************************************************************************************************************
-void RemoveGroupsCommand::readAccnos(){
- try {
- Groups.clear();
-
- ifstream in;
- m->openInputFile(accnosfile, in);
- string name;
-
- while(!in.eof()){
- in >> name;
-
- Groups.push_back(name);
-
- m->gobble(in);
- }
- in.close();
-
- m->setGroups(Groups);
-
- }
- catch(exception& e) {
- m->errorOut(e, "RemoveGroupsCommand", "readAccnos");
- exit(1);
- }
-}
-//**********************************************************************************************************************
int RemoveGroupsCommand::fillNames(){
try {
vector<string> seqs = groupMap->getNamesSeqs();
vector<string> setParameters();
string getCommandName() { return "remove.groups"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Remove.groups"; }
string getDescription() { return "removes sequences from a list, fasta, name, group, shared, design or taxonomy file from a given group or set of groups"; }
int readShared();
int readName();
int readGroup();
- void readAccnos();
int readList();
int readTax();
int fillNames();
exit(1);
}
}
-
-
+//**********************************************************************************************************************
+string RemoveLineageCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "alignreport") { outputFileName = "pick.align.report"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveLineageCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
RemoveLineageCommand::RemoveLineageCommand(){
try {
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
ofstream out;
m->openOutputFile(outputFileName, out);
vector<string> setParameters();
string getCommandName() { return "remove.lineage"; }
string getCommandCategory() { return "Phylotype Analysis"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Remove.lineage"; }
string getDescription() { return "removes sequences from a list, fasta, name, group, alignreport or taxonomy file from a given taxonomy or set of taxonomies"; }
}
}
//**********************************************************************************************************************
+string RemoveOtuLabelsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "constaxonomy") { outputFileName = "pick.taxonomy"; }
+ else if (type == "otucorr") { outputFileName = "pick.corr"; }
+ else if (type == "corraxes") { outputFileName = "pick.axes"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveOtuLabelsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(){
try {
abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
outputTypes["contaxonomy"] = tempOutNames;
- outputTypes["otu.corr"] = tempOutNames;
- outputTypes["corr.axes"] = tempOutNames;
+ outputTypes["otucorr"] = tempOutNames;
+ outputTypes["corraxes"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "RemoveOtuLabelsCommand", "RemoveOtuLabelsCommand");
}
vector<string> tempOutNames;
- outputTypes["contaxonomy"] = tempOutNames;
- outputTypes["otu.corr"] = tempOutNames;
- outputTypes["corr.axes"] = tempOutNames;
+ outputTypes["constaxonomy"] = tempOutNames;
+ outputTypes["otucorr"] = tempOutNames;
+ outputTypes["corraxes"] = tempOutNames;
//check for parameters
accnosfile = validParameter.validFile(parameters, "accnos", true);
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//get labels you want to keep
- readAccnos();
+ labels = m->readAccnos(accnosfile);
if (m->control_pressed) { return 0; }
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(constaxonomyfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomyfile)) + "pick.taxonomy";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomyfile)) + getOutputFileNameTag("constaxonomy");
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(otucorrfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(otucorrfile)) + "pick.corr";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(otucorrfile)) + getOutputFileNameTag("otucorr");
ofstream out;
m->openOutputFile(outputFileName, out);
out.close();
if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file."); m->mothurOutEndLine(); }
- outputNames.push_back(outputFileName); outputTypes["otu.corr"].push_back(outputFileName);
+ outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName);
m->mothurOut("Removed " + toString(removedCount) + " lines from your otu.corr file."); m->mothurOutEndLine();
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(corraxesfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(corraxesfile)) + "pick.axes";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(corraxesfile)) + getOutputFileNameTag("corraxes");
ofstream out;
m->openOutputFile(outputFileName, out);
out.close();
if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file."); m->mothurOutEndLine(); }
- outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);
+ outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName);
m->mothurOut("Removed " + toString(removedCount) + " lines from your corr.axes file."); m->mothurOutEndLine();
exit(1);
}
}
-
-//**********************************************************************************************************************
-int RemoveOtuLabelsCommand::readAccnos(){
- try {
-
- ifstream in;
- m->openInputFile(accnosfile, in);
- string name;
-
- while(!in.eof()){
- in >> name;
-
- labels.insert(name);
-
- m->gobble(in);
- }
- in.close();
-
- return 0;
-
- }
- catch(exception& e) {
- m->errorOut(e, "RemoveOtuLabelsCommand", "readAccnos");
- exit(1);
- }
-}
//**********************************************************************************************************************
vector<string> setParameters();
string getCommandName() { return "remove.otulabels"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Get.otulabels"; }
string getDescription() { return "Can be used with output from classify.otu, otu.association, or corr.axes to remove specific otus."; }
int readClassifyOtu();
int readOtuAssociation();
int readCorrAxes();
- int readAccnos();
-
};
/**************************************************************************************************/
exit(1);
}
}
-
+//**********************************************************************************************************************
+string RemoveOtusCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveOtusCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
///**********************************************************************************************************************
RemoveOtusCommand::RemoveOtusCommand(){
try {
groupMap->readMap();
//get groups you want to remove
- if (accnosfile != "") { readAccnos(); }
+ if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
//make sure groups are valid
//takes care of user setting groupNames that are invalid or setting groups=all
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick." + label + m->getExtension(listfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + label + "." + getOutputFileNameTag("list", listfile);
ofstream out;
m->openOutputFile(outputFileName, out);
string GroupOutputDir = outputDir;
if (outputDir == "") { GroupOutputDir += m->hasPath(groupfile); }
- string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick." + label + m->getExtension(groupfile);
+ string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + label + "." + getOutputFileNameTag("group", groupfile);
ofstream outGroup;
m->openOutputFile(outputGroupFileName, outGroup);
}
}
//**********************************************************************************************************************
-void RemoveOtusCommand::readAccnos(){
- try {
- Groups.clear();
-
- ifstream in;
- m->openInputFile(accnosfile, in);
- string name;
-
- while(!in.eof()){
- in >> name;
-
- Groups.push_back(name);
-
- m->gobble(in);
- }
- in.close();
-
- }
- catch(exception& e) {
- m->errorOut(e, "RemoveOtusCommand", "readAccnos");
- exit(1);
- }
-}
-//**********************************************************************************************************************
vector<string> setParameters();
string getCommandName() { return "remove.otus"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Remove.otus"; }
string getDescription() { return "outputs a new list file containing the otus NOT containing sequences from the groups specified"; }
vector<string> outputNames, Groups;
GroupMap* groupMap;
- void readAccnos();
int readListGroup();
int processList(ListVector*&, GroupMap*&, ofstream&, ofstream&, bool&);
exit(1);
}
}
+//**********************************************************************************************************************
+string RemoveRareCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "rabund") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "sabund") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveRareCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
RemoveRareCommand::RemoveRareCommand(){
try {
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile);
- string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
+ string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out, outGroup;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(sabundfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + "pick" + m->getExtension(sabundfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + getOutputFileNameTag("sabund", sabundfile);
outputTypes["sabund"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(rabundfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + "pick" + m->getExtension(rabundfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + getOutputFileNameTag("rabund", rabundfile);
outputTypes["rabund"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "pick" + m->getExtension(sharedfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("shared", sharedfile);
outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
vector<string> setParameters();
string getCommandName() { return "remove.rare"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Remove.rare"; }
string getDescription() { return "removes rare sequences from a sabund, rabund, shared or list and group file"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string RemoveSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "qfile") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "alignreport") { outputFileName = "pick.align.report"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
RemoveSeqsCommand::RemoveSeqsCommand(){
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//get names you want to keep
- readAccnos();
+ names = m->readAccnos(accnosfile);
if (m->control_pressed) { return 0; }
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(qualfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" + m->getExtension(qualfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
ofstream out;
m->openOutputFile(outputFileName, out);
}
}
//**********************************************************************************************************************
-void RemoveSeqsCommand::readAccnos(){
- try {
-
- ifstream in;
- m->openInputFile(accnosfile, in);
- string name;
-
- while(!in.eof()){
- in >> name;
-
- names.insert(name);
-
- m->gobble(in);
- }
- in.close();
-
- }
- catch(exception& e) {
- m->errorOut(e, "RemoveSeqsCommand", "readAccnos");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
vector<string> setParameters();
string getCommandName() { return "remove.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Remove.seqs"; }
string getDescription() { return "removes sequences from a list, fasta, name, group, alignreport, quality or taxonomy file"; }
int readName();
int readGroup();
int readAlign();
- void readAccnos();
int readList();
int readTax();
int readQual();
exit(1);
}
}
-
+//**********************************************************************************************************************
+string ReverseSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "rc" + m->getExtension(inputName); }
+ else if (type == "qfile") { outputFileName = "rc" + m->getExtension(inputName); }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ReverseSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
ReverseSeqsCommand::ReverseSeqsCommand(){
try {
ofstream outFASTA;
string tempOutputDir = outputDir;
if (outputDir == "") { tempOutputDir += m->hasPath(fastaFileName); } //if user entered a file with a path then preserve it
- fastaReverseFileName = tempOutputDir + m->getRootName(m->getSimpleName(fastaFileName)) + "rc" + m->getExtension(fastaFileName);
+ fastaReverseFileName = tempOutputDir + m->getRootName(m->getSimpleName(fastaFileName)) + getOutputFileNameTag("fasta", fastaFileName);
m->openOutputFile(fastaReverseFileName, outFASTA);
while(!inFASTA.eof()){
ofstream outQual;
string tempOutputDir = outputDir;
if (outputDir == "") { tempOutputDir += m->hasPath(qualFileName); } //if user entered a file with a path then preserve it
- string qualReverseFileName = tempOutputDir + m->getRootName(m->getSimpleName(qualFileName)) + "rc" + m->getExtension(qualFileName);
- m->openOutputFile(qualReverseFileName, outQual);
+ string qualReverseFileName = tempOutputDir + m->getRootName(m->getSimpleName(qualFileName)) + getOutputFileNameTag("qfile", qualFileName);
+ m->openOutputFile(qualReverseFileName, outQual);
while(!inQual.eof()){
if (m->control_pressed) { inQual.close(); outQual.close(); m->mothurRemove(qualReverseFileName); return 0; }
vector<string> setParameters();
string getCommandName() { return "reverse.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Reverse.seqs"; }
string getDescription() { return "outputs a fasta file containing the reverse-complements"; }
helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
helpString += "The fasta parameter is required.\n";
helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n";
- helpString += "The start parameter .... The default is -1.\n";
- helpString += "The end parameter .... The default is -1.\n";
+ helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1.\n";
+ helpString += "The end parameter is used to set a position the \"good\" sequences must end after. The default is -1.\n";
helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
exit(1);
}
}
+//**********************************************************************************************************************
+string ScreenSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "good" + m->getExtension(inputName); }
+ else if (type == "taxonomy") { outputFileName = "good" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "good" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "good" + m->getExtension(inputName); }
+ else if (type == "accnos") { outputFileName = "bad.accnos"; }
+ else if (type == "qfile") { outputFileName = "good" + m->getExtension(inputName); }
+ else if (type == "alignreport") { outputFileName = "good.align.report"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
ScreenSeqsCommand::ScreenSeqsCommand(){
try {
#endif
}
- string goodSeqFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "good" + m->getExtension(fastafile);
- string badAccnosFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
+ string goodSeqFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
+ string badAccnosFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos");
int numFastaSeqs = 0;
set<string> badSeqNames;
string seqName, seqList, group;
set<string>::iterator it;
- string goodNameFile = outputDir + m->getRootName(m->getSimpleName(namefile)) + "good" + m->getExtension(namefile);
+ string goodNameFile = outputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
outputNames.push_back(goodNameFile); outputTypes["name"].push_back(goodNameFile);
ofstream goodNameOut; m->openOutputFile(goodNameFile, goodNameOut);
ifstream inputGroups;
m->openInputFile(groupfile, inputGroups);
- string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "good" + m->getExtension(groupfile);
+ string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
outputNames.push_back(goodGroupFile); outputTypes["group"].push_back(goodGroupFile);
ofstream goodGroupOut; m->openOutputFile(goodGroupFile, goodGroupOut);
string seqName, group;
set<string>::iterator it;
- string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "good" + m->getExtension(groupfile);
- outputNames.push_back(goodGroupFile); outputTypes["group"].push_back(goodGroupFile);
+ string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
+ outputNames.push_back(goodGroupFile); outputTypes["group"].push_back(goodGroupFile);
ofstream goodGroupOut; m->openOutputFile(goodGroupFile, goodGroupOut);
while(!inputGroups.eof()){
string seqName, group;
set<string>::iterator it;
- string goodAlignReportFile = outputDir + m->getRootName(m->getSimpleName(alignreport)) + "good" + m->getExtension(alignreport);
+ string goodAlignReportFile = outputDir + m->getRootName(m->getSimpleName(alignreport)) + getOutputFileNameTag("alignreport");
outputNames.push_back(goodAlignReportFile); outputTypes["alignreport"].push_back(goodAlignReportFile);
ofstream goodAlignReportOut; m->openOutputFile(goodAlignReportFile, goodAlignReportOut);
string seqName, tax;
set<string>::iterator it;
- string goodTaxFile = outputDir + m->getRootName(m->getSimpleName(taxonomy)) + "good" + m->getExtension(taxonomy);
+ string goodTaxFile = outputDir + m->getRootName(m->getSimpleName(taxonomy)) + getOutputFileNameTag("taxonomy", taxonomy);
outputNames.push_back(goodTaxFile); outputTypes["taxonomy"].push_back(goodTaxFile);
ofstream goodTaxOut; m->openOutputFile(goodTaxFile, goodTaxOut);
m->openInputFile(qualfile, in);
set<string>::iterator it;
- string goodQualFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + "good" + m->getExtension(qualfile);
+ string goodQualFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
outputNames.push_back(goodQualFile); outputTypes["qfile"].push_back(goodQualFile);
ofstream goodQual; m->openOutputFile(goodQualFile, goodQual);
int length = MPIPos[start+i+1] - MPIPos[start+i];
char* buf4 = new char[length];
- memcpy(buf4, outputString.c_str(), length);
MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
vector<string> setParameters();
string getCommandName() { return "screen.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Screen.seqs"; }
string getDescription() { return "enables you to keep sequences that fulfill certain user defined criteria"; }
vector<string> outputNames;
vector<string> optimize;
map<string, int> nameMap;
- int readNames();
int getSummary(vector<unsigned long long>&);
int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
}
}
//**********************************************************************************************************************
+string AlignCheckCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "aligncheck") { outputFileName = "align.check"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AlignCheckCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
AlignCheckCommand::AlignCheckCommand(){
try {
abort = true; calledHelp = true;
m->openInputFile(fastafile, in);
ofstream out;
- string outfile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "align.check";
+ string outfile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("aligncheck");
m->openOutputFile(outfile, out);
vector<string> setParameters();
string getCommandName() { return "align.check"; }
string getCommandCategory() { return "Sequence Processing"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Align.check"; }
string getDescription() { return "calculate the number of potentially misaligned bases in a 16S rRNA gene sequence alignment"; }
}
}
//**********************************************************************************************************************
+string SensSpecCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "sensspec") { outputFileName = "sensspec"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SensSpecCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
SensSpecCommand::SensSpecCommand(){
try {
abort = true; calledHelp = true;
else { allLines = 1; }
}
- sensSpecFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + "sensspec";
+ sensSpecFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + getOutputFileNameTag("sensspec");
}
}
catch(exception& e) {
vector<string> setParameters();
string getCommandName() { return "sens.spec"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol. \nhttp://www.mothur.org/wiki/Sens.spec"; }
string getDescription() { return "sens.spec"; }
}
}
//**********************************************************************************************************************
+string SeqErrorCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "errorsummary") { outputFileName = "error.summary"; }
+ else if (type == "errorseq") { outputFileName = "error.seq"; }
+ else if (type == "errorquality") { outputFileName = "error.quality"; }
+ else if (type == "errorqualforward") { outputFileName = "error.qual.forward"; }
+ else if (type == "errorqualreverse") { outputFileName = "error.qual.reverse"; }
+ else if (type == "errorforward") { outputFileName = "error.seq.forward"; }
+ else if (type == "errorreverse") { outputFileName = "error.seq.reverse"; }
+ else if (type == "errorcount") { outputFileName = "error.count"; }
+ else if (type == "errormatrix") { outputFileName = "error.matrix"; }
+ else if (type == "errorchimera") { outputFileName = "error.chimera"; }
+ else if (type == "errorref-query") { outputFileName = "error.ref-query"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SeqErrorCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
SeqErrorCommand::SeqErrorCommand(){
try {
abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
- outputTypes["error.summary"] = tempOutNames;
- outputTypes["error.seq"] = tempOutNames;
- outputTypes["error.quality"] = tempOutNames;
- outputTypes["error.qual.forward"] = tempOutNames;
- outputTypes["error.qual.reverse"] = tempOutNames;
- outputTypes["error.forward"] = tempOutNames;
- outputTypes["error.reverse"] = tempOutNames;
- outputTypes["error.count"] = tempOutNames;
- outputTypes["error.matrix"] = tempOutNames;
+ outputTypes["errorsummary"] = tempOutNames;
+ outputTypes["errorseq"] = tempOutNames;
+ outputTypes["errorquality"] = tempOutNames;
+ outputTypes["errorqualforward"] = tempOutNames;
+ outputTypes["errorqualreverse"] = tempOutNames;
+ outputTypes["errorforward"] = tempOutNames;
+ outputTypes["errorreverse"] = tempOutNames;
+ outputTypes["errorcount"] = tempOutNames;
+ outputTypes["errormatrix"] = tempOutNames;
+ outputTypes["errorchimera"] = tempOutNames;
+ outputTypes["errorref-query"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "SeqErrorCommand", "SeqErrorCommand");
//initialize outputTypes
vector<string> tempOutNames;
- outputTypes["error.summary"] = tempOutNames;
- outputTypes["error.seq"] = tempOutNames;
- outputTypes["error.quality"] = tempOutNames;
- outputTypes["error.qual.forward"] = tempOutNames;
- outputTypes["error.qual.reverse"] = tempOutNames;
- outputTypes["error.forward"] = tempOutNames;
- outputTypes["error.reverse"] = tempOutNames;
- outputTypes["error.count"] = tempOutNames;
- outputTypes["error.matrix"] = tempOutNames;
+ outputTypes["errorsummary"] = tempOutNames;
+ outputTypes["errorseq"] = tempOutNames;
+ outputTypes["errorquality"] = tempOutNames;
+ outputTypes["errorqualforward"] = tempOutNames;
+ outputTypes["errorqualreverse"] = tempOutNames;
+ outputTypes["errorforward"] = tempOutNames;
+ outputTypes["errorreverse"] = tempOutNames;
+ outputTypes["errorcount"] = tempOutNames;
+ outputTypes["errormatrix"] = tempOutNames;
+ outputTypes["errorchimera"] = tempOutNames;
+ outputTypes["errorref-query"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
totalBases = 0;
totalMatches = 0;
- string errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary";
- outputNames.push_back(errorSummaryFileName); outputTypes["error.summary"].push_back(errorSummaryFileName);
+ string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(queryFileName));
+ string errorSummaryFileName = fileNameRoot + getOutputFileNameTag("errorsummary");
+ outputNames.push_back(errorSummaryFileName); outputTypes["errorsummary"].push_back(errorSummaryFileName);
- string errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq";
- outputNames.push_back(errorSeqFileName); outputTypes["error.seq"].push_back(errorSeqFileName);
+ string errorSeqFileName = fileNameRoot + getOutputFileNameTag("errorseq");
+ outputNames.push_back(errorSeqFileName); outputTypes["errorseq"].push_back(errorSeqFileName);
- string errorChimeraFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.chimera";
- outputNames.push_back(errorChimeraFileName); outputTypes["error.chimera"].push_back(errorChimeraFileName);
+ string errorChimeraFileName = fileNameRoot + getOutputFileNameTag("errorchimera");
+ outputNames.push_back(errorChimeraFileName); outputTypes["errorchimera"].push_back(errorChimeraFileName);
getReferences(); //read in reference sequences - make sure there's no ambiguous bases
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
- string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count";
+ string errorCountFileName = fileNameRoot + getOutputFileNameTag("errorcount");
ofstream errorCountFile;
m->openOutputFile(errorCountFileName, errorCountFile);
- outputNames.push_back(errorCountFileName); outputTypes["error.count"].push_back(errorCountFileName);
+ outputNames.push_back(errorCountFileName); outputTypes["errorcount"].push_back(errorCountFileName);
m->mothurOut("Overall error rate:\t" + toString((double)(totalBases - totalMatches) / (double)totalBases) + "\n");
m->mothurOut("Errors\tSequences\n");
errorCountFile << "Errors\tSequences\n";
printSubMatrix();
- string megAlignmentFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.ref-query";
+ string megAlignmentFileName = fileNameRoot + getOutputFileNameTag("errorref-query");
ofstream megAlignmentFile;
m->openOutputFile(megAlignmentFileName, megAlignmentFile);
- outputNames.push_back(megAlignmentFileName); outputTypes["error.ref-query"].push_back(megAlignmentFileName);
+ outputNames.push_back(megAlignmentFileName); outputTypes["errorref-query"].push_back(megAlignmentFileName);
for(int i=0;i<numRefs;i++){
megAlignmentFile << referenceSeqs[i].getInlineSeq() << endl;
if (queryFile.eof()) { break; }
#endif
- if(index % 100 == 0){ m->mothurOut(toString(index) + '\n'); }
+ if(index % 100 == 0){ m->mothurOut(toString(index)); m->mothurOutEndLine(); }
}
queryFile.close();
if(qFileName != "" && rFileName != ""){ reportFile.close(); qualFile.close(); }
errorSeqFile.close();
//report progress
- if(index % 100 != 0){ m->mothurOut(toString(index) + '\n'); }
+ if(index % 100 != 0){ m->mothurOut(toString(index)); m->mothurOutEndLine(); }
return index;
}
//
// int endPos = rdb->referenceSeqs[i].getEndPos();
// if(endPos < minEndPos) { minEndPos = endPos; }
+ if (rdb->referenceSeqs[i].getNumBases() == 0) {
+ m->mothurOut("[WARNING]: " + rdb->referenceSeqs[i].getName() + " is blank, ignoring.");m->mothurOutEndLine();
+ }else {
+ referenceSeqs.push_back(rdb->referenceSeqs[i]);
+ }
- referenceSeqs.push_back(rdb->referenceSeqs[i]);
}
referenceFileName = rdb->getSavedReference();
- m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();
+ m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();
}else {
int start = time(NULL);
//
// int endPos = currentSeq.getEndPos();
// if(endPos < minEndPos) { minEndPos = endPos; }
- referenceSeqs.push_back(currentSeq);
-
- if (rdb->save) { rdb->referenceSeqs.push_back(currentSeq); }
+ if (currentSeq.getNumBases() == 0) {
+ m->mothurOut("[WARNING]: " + currentSeq.getName() + " is blank, ignoring.");m->mothurOutEndLine();
+ }else {
+ referenceSeqs.push_back(currentSeq);
+ if (rdb->save) { rdb->referenceSeqs.push_back(currentSeq); }
+ }
m->gobble(referenceFile);
}
for(int i=0;i<numRefs;i++){
referenceSeqs[i].padToPos(maxStartPos);
referenceSeqs[i].padFromPos(minEndPos);
- }
+ }
if(numAmbigSeqs != 0){
m->mothurOut("Warning: " + toString(numAmbigSeqs) + " reference sequences have ambiguous bases, these bases will be ignored\n");
errors.errorRate = (double)(errors.total-errors.matches) / (double)errors.total;
errors.queryName = query.getName();
errors.refName = reference.getName();
-
//return errors;
return 0;
}
void SeqErrorCommand::printSubMatrix(){
try {
- string subMatrixFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.matrix";
+ string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(queryFileName));
+ string subMatrixFileName = fileNameRoot + getOutputFileNameTag("errormatrix");
ofstream subMatrixFile;
m->openOutputFile(subMatrixFileName, subMatrixFile);
- outputNames.push_back(subMatrixFileName); outputTypes["error.matrix"].push_back(subMatrixFileName);
+ outputNames.push_back(subMatrixFileName); outputTypes["errormatrix"].push_back(subMatrixFileName);
vector<string> bases(6);
bases[0] = "A";
bases[1] = "T";
void SeqErrorCommand::printErrorFRFile(map<char, vector<int> > errorForward, map<char, vector<int> > errorReverse){
try{
- string errorForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.forward";
+ string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(queryFileName));
+ string errorForwardFileName = fileNameRoot + getOutputFileNameTag("errorforward");
ofstream errorForwardFile;
m->openOutputFile(errorForwardFileName, errorForwardFile);
- outputNames.push_back(errorForwardFileName); outputTypes["error.forward"].push_back(errorForwardFileName);
+ outputNames.push_back(errorForwardFileName); outputTypes["errorforward"].push_back(errorForwardFileName);
errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
for(int i=0;i<maxLength;i++){
}
errorForwardFile.close();
- string errorReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.reverse";
+ string errorReverseFileName = fileNameRoot + getOutputFileNameTag("errorreverse");
ofstream errorReverseFile;
m->openOutputFile(errorReverseFileName, errorReverseFile);
- outputNames.push_back(errorReverseFileName); outputTypes["error.reverse"].push_back(errorReverseFileName);
+ outputNames.push_back(errorReverseFileName); outputTypes["errorreverse"].push_back(errorReverseFileName);
errorReverseFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
for(int i=0;i<maxLength;i++){
void SeqErrorCommand::printErrorQuality(map<char, vector<int> > qScoreErrorMap){
try{
-
- string errorQualityFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.quality";
+ string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(queryFileName));
+ string errorQualityFileName = fileNameRoot + getOutputFileNameTag("errorquality");
ofstream errorQualityFile;
m->openOutputFile(errorQualityFileName, errorQualityFile);
- outputNames.push_back(errorQualityFileName); outputTypes["error.quality"].push_back(errorQualityFileName);
+ outputNames.push_back(errorQualityFileName); outputTypes["errorquality"].push_back(errorQualityFileName);
errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl;
for(int i=0;i<41;i++){
}
}
}
-
- string qualityForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.forward";
+ string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(queryFileName));
+ string qualityForwardFileName = fileNameRoot + getOutputFileNameTag("errorqualforward");
ofstream qualityForwardFile;
m->openOutputFile(qualityForwardFileName, qualityForwardFile);
- outputNames.push_back(qualityForwardFileName); outputTypes["error.qual.forward"].push_back(qualityForwardFileName);
+ outputNames.push_back(qualityForwardFileName); outputTypes["errorqualforward"].push_back(qualityForwardFileName);
for(int i=0;i<numColumns;i++){ qualityForwardFile << '\t' << i; } qualityForwardFile << endl;
qualityForwardFile.close();
- string qualityReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.reverse";
+ string qualityReverseFileName = fileNameRoot + getOutputFileNameTag("errorqualreverse");
ofstream qualityReverseFile;
m->openOutputFile(qualityReverseFileName, qualityReverseFile);
- outputNames.push_back(qualityReverseFileName); outputTypes["error.qual.reverse"].push_back(qualityReverseFileName);
+ outputNames.push_back(qualityReverseFileName); outputTypes["errorqualreverse"].push_back(qualityReverseFileName);
for(int i=0;i<numColumns;i++){ qualityReverseFile << '\t' << i; } qualityReverseFile << endl;
for(int i=0;i<numRows;i++){
vector<string> setParameters();
string getCommandName() { return "seq.error"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Seq.error"; }
string getDescription() { return "seq.error"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string SeqSummaryCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "summary") { outputFileName = "summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SeqSummaryCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
SeqSummaryCommand::SeqSummaryCommand(){
//set current fasta to fastafile
m->setFastaFile(fastafile);
- string summaryFile = outputDir + m->getSimpleName(fastafile) + ".summary";
+ string summaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("summary");
int numSeqs = 0;
vector<string> setParameters();
string getCommandName() { return "summary.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Summary.seqs"; }
string getDescription() { return "summarize the quality of sequences in an unaligned or aligned fasta file"; }
//********************************************************************************************************************
void Sequence::padFromPos(int end){
- cout << end << '\t' << endPos << endl;
+ //cout << end << '\t' << endPos << endl;
for(int j = end; j < endPos; j++) {
aligned[j] = '.';
}
m->openInputFile(fastaFile, in);
map<string, string> seqName; //stores name -> sequence string so we can make new "unique" sequences when we parse the name file
+ int fastaCount = 0;
while (!in.eof()) {
if (m->control_pressed) { break; }
Sequence seq(in); m->gobble(in);
+ fastaCount++;
+ if (m->debug) { if((fastaCount) % 1000 == 0){ m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n."); } }
- if (seq.getName() != "") {
+ if (seq.getName() != "") {
string group = groupMap->getGroup(seq.getName());
if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your groupfile, please correct."); m->mothurOutEndLine(); }
ifstream inName;
m->openInputFile(nameFile, inName);
- string first, second;
+ //string first, second;
int countName = 0;
set<string> thisnames1;
- while(!inName.eof()) {
-
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!inName.eof()) {
if (m->control_pressed) { break; }
- inName >> first; m->gobble(inName);
- inName >> second; m->gobble(inName);
-
- vector<string> names;
- m->splitAtChar(second, names, ',');
-
- //get aligned string for these seqs from the fasta file
- string alignedString = "";
- map<string, string>::iterator itAligned = seqName.find(names[0]);
- if (itAligned == seqName.end()) {
- error = 1; m->mothurOut("[ERROR]: " + names[0] + " is in your name file and not in your fasta file, please correct."); m->mothurOutEndLine();
- }else {
- alignedString = itAligned->second;
- }
-
- //separate by group - parse one line in name file
- map<string, string> splitMap; //group -> name1,name2,...
- map<string, string>::iterator it;
- for (int i = 0; i < names.size(); i++) {
-
- string group = groupMap->getGroup(names[i]);
- if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + names[i] + " is in your name file and not in your groupfile, please correct."); m->mothurOutEndLine(); }
- else {
-
- it = splitMap.find(group);
- if (it != splitMap.end()) { //adding seqs to this group
- (it->second) += "," + names[i];
- thisnames1.insert(names[i]);
- countName++;
- }else { //first sighting of this group
- splitMap[group] = names[i];
- countName++;
- thisnames1.insert(names[i]);
-
- //is this seq in the fasta file?
- if (i != 0) { //if not then we need to add a duplicate sequence to the seqs for this group so the new "fasta" and "name" files will match
- Sequence tempSeq(names[i], alignedString); //get the first guys sequence string since he's in the fasta file.
- seqs[group].push_back(tempSeq);
- }
- }
- }
-
- allSeqsMap[names[i]] = names[0];
- }
-
-
- //fill nameMapPerGroup - holds all lines in namefile separated by group
- for (it = splitMap.begin(); it != splitMap.end(); it++) {
- //grab first name
- string firstName = "";
- for(int i = 0; i < (it->second).length(); i++) {
- if (((it->second)[i]) != ',') {
- firstName += ((it->second)[i]);
- }else { break; }
- }
-
- //group1 -> seq1 -> seq1,seq2,seq3
- nameMapPerGroup[it->first][firstName] = it->second;
- }
+ inName.read(buffer, 4096);
+ vector<string> pieces = m->splitWhiteSpace(rest, buffer, inName.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) { //save one line
+ if (m->debug) { m->mothurOut("[DEBUG]: reading names: " + firstCol + '\t' + secondCol + ".\n"); }
+ vector<string> names;
+ m->splitAtChar(secondCol, names, ',');
+
+ //get aligned string for these seqs from the fasta file
+ string alignedString = "";
+ map<string, string>::iterator itAligned = seqName.find(names[0]);
+ if (itAligned == seqName.end()) {
+ error = 1; m->mothurOut("[ERROR]: " + names[0] + " is in your name file and not in your fasta file, please correct."); m->mothurOutEndLine();
+ }else {
+ alignedString = itAligned->second;
+ }
+
+ //separate by group - parse one line in name file
+ map<string, string> splitMap; //group -> name1,name2,...
+ map<string, string>::iterator it;
+ for (int i = 0; i < names.size(); i++) {
+
+ string group = groupMap->getGroup(names[i]);
+ if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + names[i] + " is in your name file and not in your groupfile, please correct."); m->mothurOutEndLine(); }
+ else {
+
+ it = splitMap.find(group);
+ if (it != splitMap.end()) { //adding seqs to this group
+ (it->second) += "," + names[i];
+ thisnames1.insert(names[i]);
+ countName++;
+ }else { //first sighting of this group
+ splitMap[group] = names[i];
+ countName++;
+ thisnames1.insert(names[i]);
+
+ //is this seq in the fasta file?
+ if (i != 0) { //if not then we need to add a duplicate sequence to the seqs for this group so the new "fasta" and "name" files will match
+ Sequence tempSeq(names[i], alignedString); //get the first guys sequence string since he's in the fasta file.
+ seqs[group].push_back(tempSeq);
+ }
+ }
+ }
+
+ allSeqsMap[names[i]] = names[0];
+ }
+
+
+ //fill nameMapPerGroup - holds all lines in namefile separated by group
+ for (it = splitMap.begin(); it != splitMap.end(); it++) {
+ //grab first name
+ string firstName = "";
+ for(int i = 0; i < (it->second).length(); i++) {
+ if (((it->second)[i]) != ',') {
+ firstName += ((it->second)[i]);
+ }else { break; }
+ }
+
+ //group1 -> seq1 -> seq1,seq2,seq3
+ nameMapPerGroup[it->first][firstName] = it->second;
+ }
+
+ pairDone = false;
+ }
+ }
}
-
inName.close();
if (error == 1) { m->control_pressed = true; }
-
+
if (countName != (groupMap->getNumSeqs())) {
vector<string> groupseqsnames = groupMap->getNamesSeqs();
m->mothurOut("[ERROR]: No sequences available for group " + g + ", please correct."); m->mothurOutEndLine();
}else {
seqForThisGroup = it->second;
+ if (m->debug) { m->mothurOut("[DEBUG]: group " + g + " fasta file has " + toString(seqForThisGroup.size()) + " sequences."); }
}
return seqForThisGroup;
m->mothurOut("[ERROR]: No nameMap available for group " + g + ", please correct."); m->mothurOutEndLine();
}else {
nameMapForThisGroup = it->second;
+ if (m->debug) { m->mothurOut("[DEBUG]: group " + g + " name file has " + toString(nameMapForThisGroup.size()) + " unique sequences."); }
}
return nameMapForThisGroup;
CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptree);
CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pshared);
CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
+ CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter prelabund("relabund", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(prelabund);
CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff);
CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
try {
string helpString = "";
helpString += "The set.current command allows you to set the current files saved by mothur.\n";
- helpString += "The set.current command parameters are: clear, phylip, column, list, rabund, sabund, name, group, design, order, tree, shared, ordergroup, relabund, fasta, qfile, sff, oligos, accnos, biom and taxonomy.\n";
+ helpString += "The set.current command parameters are: clear, phylip, column, list, rabund, sabund, name, group, design, order, tree, shared, ordergroup, relabund, fasta, qfile, sff, oligos, accnos, biom, count and taxonomy.\n";
helpString += "The clear paramter is used to indicate which file types you would like to clear values for, multiple types can be separated by dashes.\n";
helpString += "The set.current command should be in the following format: \n";
helpString += "set.current(fasta=yourFastaFile) or set.current(fasta=amazon.fasta, clear=name-accnos)\n";
if (path == "") { parameters["ordergroup"] = inputDir + it->second; }
}
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
+
it = parameters.find("relabund");
//user has given a template file
if(it != parameters.end()){
if (groupfile == "not open") { m->mothurOut("Ignoring: " + parameters["group"]); m->mothurOutEndLine(); groupfile = ""; }
else if (groupfile == "not found") { groupfile = ""; }
if (groupfile != "") { m->setGroupFile(groupfile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { m->mothurOut("Ignoring: " + parameters["count"]); m->mothurOutEndLine(); countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ if (countfile != "") { m->setCountTableFile(countfile); }
designfile = validParameter.validFile(parameters, "design", true);
if (designfile == "not open") { m->mothurOut("Ignoring: " + parameters["design"]); m->mothurOutEndLine(); designfile = ""; }
m->setFlowFile("");
}else if (types[i] == "biom") {
m->setBiomFile("");
+ }else if (types[i] == "count") {
+ m->setCountTableFile("");
}else if (types[i] == "processors") {
m->setProcessors("1");
}else if (types[i] == "all") {
vector<string> setParameters();
string getCommandName() { return "set.current"; }
string getCommandCategory() { return "General"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Set.current"; }
string getDescription() { return "set current files for mothur"; }
string clearTypes;
vector<string> types;
- string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile;
+ string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile, countfile;
string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile;
vector<string> setParameters();
string getCommandName() { return "set.dir"; }
string getCommandCategory() { return "General"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Set.dir"; }
string getDescription() { return "set input, output and default directories"; }
vector<string> setParameters();
string getCommandName() { return "set.logfile"; }
string getCommandCategory() { return "General"; }
+ string getOutputFileNameTag(string, string) { return ""; }
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Set.logfile"; }
string getDescription() { return "set logfile name"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string SffInfoCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "fasta"; }
+ else if (type == "flow") { outputFileName = "flow"; }
+ else if (type == "sfftxt") { outputFileName = "sff.txt"; }
+ else if (type == "qfile") { outputFileName = "qual"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SffInfoCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
string rootName = outputDir + m->getRootName(m->getSimpleName(input));
if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }
- string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt";
- string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow";
+ string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("sfftxt");
+ string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("flow");
if (trim) {
- outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta";
- outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual";
+ outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("fasta");
+ outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("qfile");
}else{
- outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta";
- outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual";
+ outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("fasta");
+ outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("qfile");
}
if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); outputTypes["sfftxt"].push_back(sfftxtFileName); }
fileRoot = m->getRootName(fileRoot);
}
- string outFlowFileName = outputDir + fileRoot + "flow";
+ string outFlowFileName = outputDir + fileRoot + getOutputFileNameTag("flow");
if (trim) {
- outFastaFileName = outputDir + fileRoot + "fasta";
- outQualFileName = outputDir + fileRoot + "qual";
+ outFastaFileName = outputDir + fileRoot + getOutputFileNameTag("fasta");
+ outQualFileName = outputDir + fileRoot + getOutputFileNameTag("qfile");
}else{
- outFastaFileName = outputDir + fileRoot + "raw.fasta";
- outQualFileName = outputDir + fileRoot + "raw.qual";
+ outFastaFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("fasta");
+ outQualFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("qfile");
}
if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
- if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName); }
+ if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); }
if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); }
//read common header
vector<string> setParameters();
string getCommandName() { return "sffinfo"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Sffinfo"; }
string getDescription() { return "extract sequences reads from a .sff file"; }
#include "sharedcommand.h"
#include "sharedutilities.h"
+#include "counttable.h"
//********************************************************************************************************************
//sorts lowest to highest
try {
CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none",false,false); parameters.push_back(pbiom);
CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup",false,false); parameters.push_back(plist);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "ListGroup",false,false); parameters.push_back(pgroup);
+ CommandParameter pcount("count", "InputTypes", "", "", "", "GroupCount", "",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "none", "GroupCount", "ListGroup",false,false); parameters.push_back(pgroup);
//CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
try {
string helpString = "";
helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
- helpString += "The make.shared command parameters are list, group, biom, groups and label. list and group are required unless a current file is available or you provide a biom file.\n";
+ helpString += "The make.shared command parameters are list, group, biom, groups, count and label. list and group or count are required unless a current file is available or you provide a biom file.\n";
+ helpString += "The count parameter allows you to provide a count file containing the group info for the list file.\n";
helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
//helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
exit(1);
}
}
+//**********************************************************************************************************************
+string SharedCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "shared") { outputFileName = "shared"; }
+ else if (type == "rabund") { outputFileName = "rabund"; }
+ else if (type == "group") { outputFileName = "groups"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SharedCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
SharedCommand::SharedCommand(){
try {
if (path == "") { parameters["group"] = inputDir + it->second; }
}
- /*it = parameters.find("ordergroup");
+ it = parameters.find("count");
//user has given a template file
if(it != parameters.end()){
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["ordergroup"] = inputDir + it->second; }
- }*/
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
it = parameters.find("biom");
//user has given a template file
}
}
+ vector<string> tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["shared"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
if (groupfile == "not open") { groupfile = ""; abort = true; }
else if (groupfile == "not found") { groupfile = ""; }
else { m->setGroupFile(groupfile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
if ((biomfile == "") && (listfile == "")) {
//is there are current file available for either of these?
else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
if (listfile != "") {
- if (groupfile == "") {
+ if ((groupfile == "") && (countfile == "")) {
groupfile = m->getGroupFile();
if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("You need to provide a groupfle if you are going to use the list format."); m->mothurOutEndLine();
- abort = true;
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You need to provide a groupfile or countfile if you are going to use the list format."); m->mothurOutEndLine();
+ abort = true;
+ }
}
}
}
if (outputDir == "") { outputDir += m->hasPath(filename); }
filename = outputDir + m->getRootName(m->getSimpleName(filename));
- filename = filename + "shared";
+ filename = filename + getOutputFileNameTag("shared");
outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
if (listfile != "") { createSharedFromListGroup(filename); }
ifstream in;
m->openInputFile(biomfile, in);
-
- m->getline(in); m->gobble(in); //grab first '{'
-
+
string matrixFormat = "";
int numRows = 0;
int numCols = 0;
int shapeNumCols = 0;
vector<string> otuNames;
vector<string> groupNames;
- while (!in.eof()) {
-
+ map<string, string> fileLines;
+ vector<string> names;
+ int countOpenBrace = 0;
+ int countClosedBrace = 0;
+ int openParen = -1; //account for opening brace
+ int closeParen = 0;
+ bool ignoreCommas = false;
+ bool atComma = false;
+ string line = "";
+ string matrixElementType = "";
+
+ while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
if (m->control_pressed) { break; }
- string line = m->getline(in); m->gobble(in);
+ char c = in.get(); m->gobble(in);
- string tag = getTag(line);
+ if (c == '[') { countOpenBrace++; }
+ else if (c == ']') { countClosedBrace++; }
+ else if (c == '{') { openParen++; }
+ else if (c == '}') { closeParen++; }
+ else if ((!ignoreCommas) && (c == ',')) { atComma = true; }
- if (tag == "type") {
- //check to make sure this is an OTU table
- string type = getTag(line);
- if (type != "OTU table") { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
- }else if (tag == "matrix_type") {
- //get type and check type
- matrixFormat = getTag(line);
- if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
- }else if (tag == "matrix_element_type") {
- //get type and check type
- string matrixElementType = getTag(line);
- if (matrixElementType != "int") { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid matrix_element_type for mothur. Only type allowed is int.\n"); m->control_pressed = true; }
- }else if (tag == "rows") {
- //read otu names
- otuNames = readRows(line, in, numRows);
- }else if (tag == "columns") {
- //read sample names
- groupNames = readRows(line, in, numCols);
-
- //if users selected groups, then remove the groups not wanted.
- SharedUtil util;
- vector<string> Groups = m->getGroups();
- vector<string> allGroups = groupNames;
- util.setGroups(Groups, allGroups);
- m->setGroups(Groups);
-
- //fill filehandles with neccessary ofstreams
- int i;
- ofstream* temp;
- for (i=0; i<Groups.size(); i++) {
- temp = new ofstream;
- filehandles[Groups[i]] = temp;
- }
-
- //set fileroot
- fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
-
- //clears file before we start to write to it below
- for (int i=0; i<Groups.size(); i++) {
- m->mothurRemove((fileroot + Groups[i] + ".rabund"));
- outputNames.push_back((fileroot + Groups[i] + ".rabund"));
- outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
+ if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true; }
+ else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false; }
+ if (atComma && !ignoreCommas) {
+ if (fileLines.size() == 0) { //clip first {
+ line = line.substr(1);
}
-
- }else if (tag == "shape") {
- getDims(line, shapeNumRows, shapeNumCols);
+ string tag = getTag(line);
+ fileLines[tag] = line;
+ line = "";
+ atComma = false;
+ ignoreCommas = false;
- //check shape
- if (shapeNumCols != numCols) {
- m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true;
- }
-
- if (shapeNumRows != numRows) {
- m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true;
- }
- }else if (tag == "data") {
- m->currentBinLabels = otuNames;
-
- //read data
- vector<SharedRAbundVector*> lookup = readData(matrixFormat, line, in, groupNames, otuNames.size());
-
- m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- lookup[0]->printHeaders(out);
- printSharedData(lookup, out);
- }
+ }else { line += c; }
+
+ }
+ if (line != "") {
+ line = line.substr(0, line.length()-1);
+ string tag = getTag(line);
+ fileLines[tag] = line;
}
in.close();
-
+ map<string, string>::iterator it;
+ it = fileLines.find("type");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
+ else {
+ string thisLine = it->second;
+ string type = getTag(thisLine);
+ if ((type != "OTU table") && (type != "OTUtable")) { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("matrix_type");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
+ else {
+ string thisLine = it->second;
+ matrixFormat = getTag(thisLine);
+ if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("matrix_element_type");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
+ else {
+ string thisLine = it->second;
+ matrixElementType = getTag(thisLine);
+ if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->control_pressed = true; }
+ if (matrixElementType == "float") { m->mothurOut("[WARNING]: the shared file only uses integers, any float values will be rounded down to the nearest integer.\n"); }
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("rows");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
+ else {
+ string thisLine = it->second;
+ otuNames = readRows(thisLine, numRows);
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("columns");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
+ else {
+ string thisLine = it->second;
+ //read sample names
+ groupNames = readRows(thisLine, numCols);
+
+ //if users selected groups, then remove the groups not wanted.
+ SharedUtil util;
+ vector<string> Groups = m->getGroups();
+ vector<string> allGroups = groupNames;
+ util.setGroups(Groups, allGroups);
+ m->setGroups(Groups);
+
+ //fill filehandles with neccessary ofstreams
+ int i;
+ ofstream* temp;
+ for (i=0; i<Groups.size(); i++) {
+ temp = new ofstream;
+ filehandles[Groups[i]] = temp;
+ }
+
+ //set fileroot
+ fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
+
+ //clears file before we start to write to it below
+ for (int i=0; i<Groups.size(); i++) {
+ m->mothurRemove((fileroot + Groups[i] + ".rabund"));
+ outputNames.push_back((fileroot + Groups[i] + ".rabund"));
+ outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
+ }
+ }
+
+ if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("shape");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
+ else {
+ string thisLine = it->second;
+ getDims(thisLine, shapeNumRows, shapeNumCols);
+
+ //check shape
+ if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true; }
+
+ if (shapeNumRows != numRows) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true; }
+ }
+
+ if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("data");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
+ else {
+ string thisLine = it->second;
+ m->currentBinLabels = otuNames;
+
+ //read data
+ vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
+
+ m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+ lookup[0]->printHeaders(out);
+ printSharedData(lookup, out);
+ }
+
+ for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
+ out.close();
+
+ if (m->control_pressed) { m->mothurRemove(filename); return 0; }
+
return 0;
}
catch(exception& e) {
}
}
//**********************************************************************************************************************
-vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, ifstream& in, vector<string>& groupNames, int numOTUs) {
+vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, string matrixElementType, vector<string>& groupNames, int numOTUs) {
try {
vector<SharedRAbundVector*> lookup;
else if ((line[i] == ']') && (inBrackets)) {
inBrackets = false;
int temp;
- m->mothurConvert(num, temp);
+ float temp2;
+ if (matrixElementType == "float") { m->mothurConvert(num, temp2); temp = (int)temp2; }
+ else { m->mothurConvert(num, temp); }
nums.push_back(temp);
num = "";
}
}
- //same as above just reading from file.
- while (!in.eof()) {
-
- char c = in.get(); m->gobble(in);
-
- if (m->control_pressed) { return lookup; }
-
- //look for opening [ to indicate data is starting
- if ((c == '[') && (!dataStart)) { dataStart = true; c = in.get(); if (in.eof()) { break; } }
- else if ((c == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
-
- if (dataStart) {
- if ((c == '[') && (!inBrackets)) { inBrackets = true; c = in.get(); if (in.eof()) { break; } }
- else if ((c == ']') && (inBrackets)) {
- inBrackets = false;
- int temp;
- m->mothurConvert(num, temp);
- nums.push_back(temp);
- num = "";
-
- //save info to vectors
- if (matrixFormat == "dense") {
-
- //sanity check
- if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
-
- //set abundances for this otu
- //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
- for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
-
- otuCount++;
- }else {
- //sanity check
- if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
-
- //nums contains [otuNum, sampleNum, abundance]
- lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
- }
- nums.clear();
- }
-
- if (inBrackets) {
- if (c == ',') {
- int temp;
- m->mothurConvert(num, temp);
- nums.push_back(temp);
- num = "";
- }else { if (!isspace(c)) { num += c; } }
- }
- }
- }
-
SharedUtil util;
bool remove = false;
}
}
//**********************************************************************************************************************
-vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows) {
+vector<string> SharedCommand::readRows(string line, int& numRows) {
try {
/*"rows":[
{"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
}
}
- //keep reading
- if (!end) {
- while (!in.eof()) {
-
- if (m->control_pressed) { break; }
-
- char c = in.get(); m->gobble(in);
-
- if (c == '[') { countOpenBrace++; }
- else if (c == ']') { countClosedBrace++; }
- else if (c == '{') { openParen++; }
- else if (c == '}') { closeParen++; }
- else if (openParen != 0) { nextRow += c; } //you are reading the row info
-
-
- //you have reached the end of the rows info
- if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
- if ((openParen == closeParen) && (closeParen != 0)) { //process row
- numRows++;
- vector<string> items;
- m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
- string part = items[0]; items.clear();
- m->splitAtChar(part, items, ':'); //split part we want containing the ids
- string name = items[1];
-
- //remove "" if needed
- int pos = name.find("\"");
- if (pos != string::npos) {
- string newName = "";
- for (int k = 0; k < name.length(); k++) {
- if (name[k] != '\"') { newName += name[k]; }
- }
- name = newName;
- }
- names.push_back(name);
- nextRow = "";
- openParen = 0;
- closeParen = 0;
- }
- }
- }
-
return names;
}
catch(exception& e) {
}
}
//**********************************************************************************************************************
-//designed for things like "type": "OTU table", returns map type -> OTU table
+//designed for things like "type": "OTU table", returns type
string SharedCommand::getTag(string& line) {
try {
bool inQuotes = false;
ofstream out;
m->openOutputFile(filename, out);
- GroupMap* groupMap = new GroupMap(groupfile);
+ GroupMap* groupMap = NULL;
+ CountTable* countTable = NULL;
+ if (groupfile != "") {
+ groupMap = new GroupMap(groupfile);
- int groupError = groupMap->readMap();
- if (groupError == 1) { delete groupMap; return 0; }
- vector<string> allGroups = groupMap->getNamesOfGroups();
- m->setAllGroups(allGroups);
+ int groupError = groupMap->readMap();
+ if (groupError == 1) { delete groupMap; return 0; }
+ vector<string> allGroups = groupMap->getNamesOfGroups();
+ m->setAllGroups(allGroups);
+ }else{
+ countTable = new CountTable();
+ countTable->readTable(countfile);
+ }
+
+ if (m->control_pressed) { return 0; }
pickedGroups = false;
//if hte user has not specified any groups then use them all
if (Groups.size() == 0) {
- Groups = groupMap->getNamesOfGroups(); m->setGroups(Groups);
+ if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); }
+ else { Groups = countTable->getNamesOfGroups(); }
+ m->setGroups(Groups);
}else { pickedGroups = true; }
//fill filehandles with neccessary ofstreams
//clears file before we start to write to it below
for (int i=0; i<Groups.size(); i++) {
- m->mothurRemove((fileroot + Groups[i] + ".rabund"));
- outputNames.push_back((fileroot + Groups[i] + ".rabund"));
- outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
+ m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
+ outputNames.push_back((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
+ outputTypes["rabund"].push_back((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
}
string errorOff = "no error";
vector<SharedRAbundVector*> lookup;
if (m->control_pressed) {
- delete SharedList; delete groupMap;
+ delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
out.close(); m->mothurRemove(filename);
- for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
+ for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
return 0;
}
//sanity check
- vector<string> groupMapNamesSeqs = groupMap->getNamesSeqs();
- int error = ListGroupSameSeqs(groupMapNamesSeqs, SharedList);
+ vector<string> namesSeqs;
+ int numGroupNames = 0;
+ if (m->groupMode == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
+ else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
+ int error = ListGroupSameSeqs(namesSeqs, SharedList);
- if ((!pickedGroups) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) { //if the user has not specified any groups and their files don't match exit with error
- m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine();
-
- out.close();
- m->mothurRemove(filename); //remove blank shared file you made
+ if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error
+ m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine();
- createMisMatchFile(SharedList, groupMap);
+ out.close(); m->mothurRemove(filename); //remove blank shared file you made
//delete memory
- for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
- delete it3->second;
- }
-
- delete SharedList; delete groupMap;
-
+ for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
+ delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
return 0;
}
if (error == 1) { m->control_pressed = true; }
//if user has specified groups make new groupfile for them
- if (pickedGroups) { //make new group file
+ if ((pickedGroups) && (m->groupMode == "group")) { //make new group file
string groups = "";
if (m->getNumGroups() < 4) {
for (int i = 0; i < m->getNumGroups(); i++) {
}
}else { groups = "merge"; }
- string newGroupFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + groups + "groups";
+ string newGroupFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + groups + getOutputFileNameTag("group");
outputTypes["group"].push_back(newGroupFile);
outputNames.push_back(newGroupFile);
ofstream outGroups;
while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
if (m->control_pressed) {
- delete SharedList; delete groupMap;
+ delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
out.close(); m->mothurRemove(filename);
- for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
+ for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
return 0;
}
}
if (m->control_pressed) {
- delete SharedList; delete groupMap;
+ delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
out.close(); m->mothurRemove(filename);
- for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
+ for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
return 0;
}
if (m->control_pressed) {
- delete SharedList; delete groupMap;
+ delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
out.close(); m->mothurRemove(filename);
- for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
+ for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
return 0;
}
}
if (m->control_pressed) {
- delete groupMap;
+ if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
out.close(); m->mothurRemove(filename);
- for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
+ for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
return 0;
}
delete it3->second;
}
- delete groupMap;
+ if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
if (m->control_pressed) {
m->mothurRemove(filename);
- for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
+ for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
return 0;
}
Groups.push_back(thislookup[i]->getGroup());
RAbundVector rav = thislookup[i]->getRAbundVector();
- m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
+ m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + "." + getOutputFileNameTag("rabund"), *(filehandles[thislookup[i]->getGroup()]));
rav.print(*(filehandles[thislookup[i]->getGroup()]));
(*(filehandles[thislookup[i]->getGroup()])).close();
}
Groups.push_back((myIt->second)->getGroup());
RAbundVector rav = (myIt->second)->getRAbundVector();
- m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()]));
+ m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + "." + getOutputFileNameTag("rabund"), *(filehandles[(myIt->second)->getGroup()]));
rav.print(*(filehandles[(myIt->second)->getGroup()]));
(*(filehandles[(myIt->second)->getGroup()])).close();
}else{
}
}
//**********************************************************************************************************************
-int SharedCommand::createMisMatchFile(SharedListVector* SharedList, GroupMap* groupMap) {
- try {
- ofstream outMisMatch;
- string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(listfile));
-
- //you have sequences in your list file that are not in your group file
- if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) {
- outputMisMatchName += "missing.group";
- m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
-
- m->openOutputFile(outputMisMatchName, outMisMatch);
-
- set<string> listNames;
- set<string>::iterator itList;
-
- //go through list and if group returns "not found" output it
- for (int i = 0; i < SharedList->getNumBins(); i++) {
- if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; }
-
- string names = SharedList->get(i);
-
- vector<string> binNames;
- m->splitAtComma(names, binNames);
-
- for (int j = 0; j < binNames.size(); j++) {
- string name = binNames[j];
- string group = groupMap->getGroup(name);
-
- if(group == "not found") { outMisMatch << name << endl; }
-
- itList = listNames.find(name);
- if (itList != listNames.end()) { m->mothurOut(name + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
- else { listNames.insert(name); }
- }
- }
-
- outMisMatch.close();
-
-
- }else {//you have sequences in your group file that are not in you list file
-
- outputMisMatchName += "missing.name";
- m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
-
- map<string, string> namesInList;
- map<string, string>::iterator itList;
-
- //go through listfile and get names
- for (int i = 0; i < SharedList->getNumBins(); i++) {
- if (m->control_pressed) { return 0; }
-
-
- string names = SharedList->get(i);
-
- vector<string> binNames;
- m->splitAtComma(names, binNames);
-
- for (int j = 0; j < binNames.size(); j++) {
-
- string name = binNames[j];
-
- itList = namesInList.find(name);
- if (itList != namesInList.end()) { m->mothurOut(name + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
-
- namesInList[name] = name;
-
- }
- }
-
- //get names of sequences in groupfile
- vector<string> seqNames = groupMap->getNamesSeqs();
-
- map<string, string>::iterator itMatch;
-
- m->openOutputFile(outputMisMatchName, outMisMatch);
-
- //loop through names in seqNames and if they aren't in namesIn list output them
- for (int i = 0; i < seqNames.size(); i++) {
- if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; }
-
- itMatch = namesInList.find(seqNames[i]);
-
- if (itMatch == namesInList.end()) {
-
- outMisMatch << seqNames[i] << endl;
- }
- }
- outMisMatch.close();
- }
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SharedCommand", "createMisMatchFile");
- exit(1);
- }
-}
-//**********************************************************************************************************************
int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
try {
int error = 0;
vector<string> setParameters();
string getCommandName() { return "make.shared"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Make.shared"; }
string getDescription() { return "make a shared file from a list and group file"; }
private:
void printSharedData(vector<SharedRAbundVector*>, ofstream&);
- int createMisMatchFile(SharedListVector*, GroupMap*);
int readOrderFile();
bool isValidGroup(string, vector<string>);
int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
int createSharedFromListGroup(string);
int createSharedFromBiom(string);
string getTag(string&);
- vector<string> readRows(string, ifstream&, int&);
+ vector<string> readRows(string, int&);
int getDims(string, int&, int&);
- vector<SharedRAbundVector*> readData(string, string, ifstream&, vector<string>&, int);
+ vector<SharedRAbundVector*> readData(string, string, string, vector<string>&, int);
vector<string> Groups, outputNames, order;
set<string> labels;
- string fileroot, outputDir, listfile, groupfile, biomfile, ordergroupfile;
+ string fileroot, outputDir, listfile, groupfile, biomfile, ordergroupfile, countfile;
bool firsttime, pickedGroups, abort, allLines;
map<string, ofstream*> filehandles;
map<string, ofstream*>::iterator it3;
/***********************************************************************/
-SharedListVector::SharedListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; }
+SharedListVector::SharedListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
/***********************************************************************/
-SharedListVector::SharedListVector(int n): DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; }
+SharedListVector::SharedListVector(int n): DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
/***********************************************************************/
SharedListVector::SharedListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
try {
+ groupmap = NULL; countTable = NULL;
//set up groupmap for later.
- groupmap = new GroupMap(m->getGroupFile());
- groupmap->readMap();
+ if (m->groupMode == "group") {
+ groupmap = new GroupMap(m->getGroupFile());
+ groupmap->readMap();
+ }else {
+ countTable = new CountTable();
+ countTable->readTable(m->getCountTableFile());
+ }
int hold;
string inputData;
/***********************************************************************/
SharedOrderVector* SharedListVector::getSharedOrderVector(){
try {
- string groupName, names, name;
-
SharedOrderVector* order = new SharedOrderVector();
order->setLabel(label);
for(int i=0;i<numBins;i++){
int binSize = m->getNumNames(get(i)); //find number of individual in given bin
- names = get(i);
- while (names.find_first_of(',') != -1) {
- name = names.substr(0,names.find_first_of(','));
- names = names.substr(names.find_first_of(',')+1, names.length());
- groupName = groupmap->getGroup(name);
-
- if(groupName == "not found") { m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
+ string names = get(i);
+ vector<string> binNames;
+ m->splitAtComma(names, binNames);
+ if (m->groupMode != "group") {
+ binSize = 0;
+ for (int j = 0; j < binNames.size(); j++) { binSize += countTable->getNumSeqs(binNames[i]); }
+ }
+ for (int j = 0; j < binNames.size(); j++) {
+ if (m->control_pressed) { return order; }
+ if (m->groupMode == "group") {
+ string groupName = groupmap->getGroup(binNames[i]);
+ if(groupName == "not found") { m->mothurOut("Error: Sequence '" + binNames[i] + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- order->push_back(i, binSize, groupName); //i represents what bin you are in
+ order->push_back(i, binSize, groupName); //i represents what bin you are in
+ }else {
+ vector<int> groupAbundances = countTable->getGroupCounts(binNames[i]);
+ vector<string> groupNames = countTable->getNamesOfGroups();
+ for (int k = 0; k < groupAbundances.size(); k++) { //groupAbundances.size() == 0 if there is a file mismatch and m->control_pressed is true.
+ if (m->control_pressed) { return order; }
+ for (int l = 0; l < groupAbundances[k]; l++) { order->push_back(i, binSize, groupNames[k]); }
+ }
+ }
}
- //get last name
- groupName = groupmap->getGroup(names);
- if(groupName == "not found") { m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- order->push_back(i, binSize, groupName);
}
random_shuffle(order->begin(), order->end());
SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) {
try {
SharedRAbundVector rav(data.size());
- string group, names, name;
for(int i=0;i<numBins;i++){
- names = get(i);
- while (names.find_first_of(',') != -1) {
- name = names.substr(0,names.find_first_of(','));
- names = names.substr(names.find_first_of(',')+1, names.length());
- group = groupmap->getGroup(name);
- if(group == "not found") { m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- if (group == groupName) { //this name is in the group you want the vector for.
- rav.set(i, rav.getAbundance(i) + 1, group); //i represents what bin you are in
- }
- }
-
- //get last name
- groupName = groupmap->getGroup(names);
- if(groupName == "not found") { m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- if (group == groupName) { //this name is in the group you want the vector for.
- rav.set(i, rav.getAbundance(i) + 1, group); //i represents what bin you are in
+ string names = get(i);
+ vector<string> binNames;
+ m->splitAtComma(names, binNames);
+ for (int j = 0; j < binNames.size(); j++) {
+ if (m->control_pressed) { return rav; }
+ if (m->groupMode == "group") {
+ string group = groupmap->getGroup(binNames[j]);
+ if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
+ if (group == groupName) { //this name is in the group you want the vector for.
+ rav.set(i, rav.getAbundance(i) + 1, group); //i represents what bin you are in
+ }
+ }else {
+ int count = countTable->getGroupCount(binNames[j], groupName);
+ rav.set(i, rav.getAbundance(i) + count, groupName);
+ }
}
}
SharedUtil* util;
util = new SharedUtil();
vector<SharedRAbundVector*> lookup; //contains just the groups the user selected
+ vector<SharedRAbundVector*> lookupDelete;
map<string, SharedRAbundVector*> finder; //contains all groups in groupmap
- string group, names, name;
vector<string> Groups = m->getGroups();
- vector<string> allGroups = groupmap->getNamesOfGroups();
+ vector<string> allGroups;
+ if (m->groupMode == "group") { allGroups = groupmap->getNamesOfGroups(); }
+ else { allGroups = countTable->getNamesOfGroups(); }
util->setGroups(Groups, allGroups);
m->setGroups(Groups);
delete util;
finder[allGroups[i]]->setGroup(allGroups[i]);
if (m->inUsersGroups(allGroups[i], m->getGroups())) { //if this group is in user groups
lookup.push_back(finder[allGroups[i]]);
- }
+ }else {
+ lookupDelete.push_back(finder[allGroups[i]]);
+ }
}
//fill vectors
for(int i=0;i<numBins;i++){
- names = get(i);
- int nameLength = names.size();
- string seqName = "";
-
- for(int j=0;j<nameLength;j++){
- if(names[j] == ','){
- group = groupmap->getGroup(seqName);
- if(group == "not found") { m->mothurOut("Error: Sequence '" + seqName + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
-
- seqName = "";
- }
- else{
- seqName += names[j];
- }
+ string names = get(i);
+ vector<string> binNames;
+ m->splitAtComma(names, binNames);
+ for (int j = 0; j < binNames.size(); j++) {
+ if (m->groupMode == "group") {
+ string group = groupmap->getGroup(binNames[j]);
+ if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
+ finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
+ }else{
+ vector<int> counts = countTable->getGroupCounts(binNames[j]);
+ for (int k = 0; k < allGroups.size(); k++) {
+ finder[allGroups[k]]->set(i, finder[allGroups[k]]->getAbundance(i) + counts[k], allGroups[k]);
+ }
+ }
}
- group = groupmap->getGroup(seqName);
- if(group == "not found") { m->mothurOut("Error: Sequence '" + seqName + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
-
-
-
-// while (names.find_first_of(',') != -1) {
-// name = names.substr(0,names.find_first_of(','));
-// names = names.substr(names.find_first_of(',')+1, names.length());
-// group = groupmap->getGroup(name);
-// if(group == "not found") { m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
-// finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
-// }
-
- //get last name
-// group = groupmap->getGroup(names);
-// if(group == "not found") { m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
-// finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
-
}
+
+ for (int j = 0; j < lookupDelete.size(); j++) { delete lookupDelete[j]; }
return lookup;
}
OrderVector ov;
for(int i=0;i<data.size();i++){
- int binSize = m->getNumNames(data[i]);
+ string names = data[i];
+ vector<string> binNames;
+ m->splitAtComma(names, binNames);
+ int binSize = binNames.size();
+ if (m->groupMode != "group") {
+ binSize = 0;
+ for (int j = 0; j < binNames.size(); j++) { binSize += countTable->getNumSeqs(binNames[i]); }
+ }
for(int j=0;j<binSize;j++){
ov.push_back(i);
}
for(int i=0;i<data.size();i++){
string listOTU = data[i];
- int length = listOTU.size();
-
- string seqName="";
-
- for(int j=0;j<length;j++){
-
- if(listOTU[j] != ','){
- seqName += listOTU[j];
- }
- else{
- if(orderMap->count(seqName) == 0){
- m->mothurOut(seqName + " not found, check *.names file\n");
- exit(1);
- }
-
- ov.set((*orderMap)[seqName], i);
- seqName = "";
- }
- }
-
- if(orderMap->count(seqName) == 0){
- m->mothurOut(seqName + " not found, check *.names file\n");
- exit(1);
+ vector<string> binNames;
+ m->splitAtComma(listOTU, binNames);
+ for (int j = 0; j < binNames.size(); j++) {
+ if(orderMap->count(binNames[j]) == 0){
+ m->mothurOut(binNames[j] + " not found, check *.names file\n");
+ exit(1);
+ }
+ ov.set((*orderMap)[binNames[j]], i);
}
- ov.set((*orderMap)[seqName], i);
}
ov.setLabel(label);
#include "datavector.hpp"
#include "groupmap.h"
+#include "counttable.h"
#include "sharedrabundvector.h"
#include "sharedsabundvector.h"
SharedListVector();
SharedListVector(int);
SharedListVector(ifstream&);
- SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; };
- ~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } };
+ SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; countTable = NULL; };
+ ~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } if (countTable != NULL) { delete countTable; } };
int getNumBins() { return numBins; }
int getNumSeqs() { return numSeqs; }
private:
vector<string> data; //data[i] is a list of names of sequences in the ith OTU.
GroupMap* groupmap;
+ CountTable* countTable;
int maxRank;
int numBins;
int numSeqs;
while ((nextLabel == holdLabel) && (f.eof() != true)) {
f >> groupN >> num;
- if (num != 1000) { break; }
count++;
allGroups.push_back(groupN);
m->saveNextLabel = nextLabel;
m->setAllGroups(allGroups);
+ for (int i = 0; i < allGroups.size(); i++) { cout << allGroups[i] << endl; }
}
catch(exception& e) {
}
}
//**********************************************************************************************************************
+string ShhherCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "shhh.fasta"; }
+ else if (type == "name") { outputFileName = "shhh.names"; }
+ else if (type == "group") { outputFileName = "shhh.groups"; }
+ else if (type == "counts") { outputFileName = "shhh.counts"; }
+ else if (type == "qfile") { outputFileName = "shhh.qual"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ShhherCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ShhherCommand::ShhherCommand(){
try {
setParameters();
//initialize outputTypes
-// vector<string> tempOutNames;
-// outputTypes["pn.dist"] = tempOutNames;
+ vector<string> tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["name"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
+ outputTypes["counts"] = tempOutNames;
+ outputTypes["qfile"] = tempOutNames;
}
catch(exception& e) {
}
//initialize outputTypes
- vector<string> tempOutNames;
-// outputTypes["pn.dist"] = tempOutNames;
- // outputTypes["fasta"] = tempOutNames;
+ vector<string> tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["name"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
+ outputTypes["counts"] = tempOutNames;
+ outputTypes["qfile"] = tempOutNames;
+
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
}
- string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+ string nameFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
ofstream nameFile;
m->openOutputFile(nameFileName, nameFile);
try{
if (numOTUs < processors) { processors = 1; }
+ if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); }
+
dist.assign(numSeqs * numOTUs, 0);
change.assign(numOTUs, 1);
centroids.assign(numOTUs, -1);
nSeqsBreaks.assign(processors+1, 0);
nOTUsBreaks.assign(processors+1, 0);
+ if (m->debug) { m->mothurOut("[DEBUG]: made it through the memory allocation.\n"); }
+
nSeqsBreaks[0] = 0;
for(int i=0;i<processors;i++){
nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual";
+ string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("qfile");
ofstream qualityFile;
m->openOutputFile(qualityFileName, qualityFile);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta";
+ string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta");
ofstream fastaFile;
m->openOutputFile(fastaFileName, fastaFile);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names";
+ string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
ofstream nameFile;
m->openOutputFile(nameFileName, nameFile);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
- string groupFileName = fileRoot + "shhh.groups";
+ string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
+ int pos = fileRoot.find_first_of('.');
+ string fileGroup = fileRoot;
+ if (pos != string::npos) { fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1))); }
+ string groupFileName = thisOutputDir + fileRoot + getOutputFileNameTag("group");
ofstream groupFile;
m->openOutputFile(groupFileName, groupFile);
for(int i=0;i<numSeqs;i++){
if (m->control_pressed) { break; }
- groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
+ groupFile << seqNameVector[i] << '\t' << fileGroup << endl;
}
groupFile.close();
outputNames.push_back(groupFileName);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts";
+ string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) +getOutputFileNameTag("counts");
ofstream otuCountsFile;
m->openOutputFile(otuCountsFileName, otuCountsFile);
vector<int> uniqueLengths;
int numFlowCells;
+ if (m->debug) { m->mothurOut("[DEBUG]: About to read flowgrams.\n"); }
int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
if (m->control_pressed) { break; }
vector<int> nSeqsBreaks;
vector<int> nOTUsBreaks;
+ if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); }
+
dist.assign(numSeqs * numOTUs, 0);
change.assign(numOTUs, 1);
centroids.assign(numOTUs, -1);
nSeqsBreaks[1] = numSeqs;
nOTUsBreaks[1] = numOTUs;
+ if (m->debug) { m->mothurOut("[DEBUG]: done allocating memory, about to denoise.\n"); }
+
if (m->control_pressed) { break; }
double maxDelta = 0;
if (m->control_pressed) { break; }
vector<int> otuCounts(numOTUs, 0);
- for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
+ for(int j=0;j<numSeqs;j++) { otuCounts[otuData[j]]++; }
calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
if ((large) && (g == 0)) { flowFileName = filenames[i]; theseFlowFileNames[0] = filenames[i]; }
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir = m->hasPath(flowFileName); }
- string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual";
- string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta";
- string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names";
- string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts";
- string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
- string groupFileName = fileRoot + "shhh.groups";
+ string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("qfile");
+ string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta");
+ string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
+ string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("counts");
+ string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
+ int pos = fileRoot.find_first_of('.');
+ string fileGroup = fileRoot;
+ if (pos != string::npos) { fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1))); }
+ string groupFileName = thisOutputDir + fileRoot + getOutputFileNameTag("group");
writeQualities(numOTUs, numFlowCells, qualityFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, fastaFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
writeNames(thisCompositeNamesFileName, numOTUs, nameFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU); if (m->control_pressed) { break; }
writeClusters(otuCountsFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI); if (m->control_pressed) { break; }
- writeGroups(groupFileName, fileRoot, numSeqs, seqNameVector); if (m->control_pressed) { break; }
+ writeGroups(groupFileName, fileGroup, numSeqs, seqNameVector); if (m->control_pressed) { break; }
if (large) {
if (g > 0) {
- m->appendFiles(qualityFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.qual"));
+ m->appendFiles(qualityFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("qfile")));
m->mothurRemove(qualityFileName);
- m->appendFiles(fastaFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.fasta"));
+ m->appendFiles(fastaFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("fasta")));
m->mothurRemove(fastaFileName);
- m->appendFiles(nameFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.names"));
+ m->appendFiles(nameFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("name")));
m->mothurRemove(nameFileName);
- m->appendFiles(otuCountsFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.counts"));
+ m->appendFiles(otuCountsFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("counts")));
m->mothurRemove(otuCountsFileName);
- m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.groups"));
+ m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("group")));
m->mothurRemove(groupFileName);
}
m->mothurRemove(theseFlowFileNames[g]);
thisNameMap.clear();
flowFile >> numFlowCells;
+ if (m->debug) { m->mothurOut("[DEBUG]: numFlowCells = " + toString(numFlowCells) + ".\n"); }
int index = 0;//pcluster
while(!flowFile.eof()){
if (m->control_pressed) { break; }
flowFile >> seqName >> currentNumFlowCells;
+
thisLengths.push_back(currentNumFlowCells);
thisSeqNameVector.push_back(seqName);
thisNameMap[seqName] = index++;//pcluster
-
+
+ if (m->debug) { m->mothurOut("[DEBUG]: seqName = " + seqName + " length = " + toString(currentNumFlowCells) + " index = " + toString(index) + "\n"); }
+
for(int i=0;i<numFlowCells;i++){
flowFile >> intensity;
if(intensity > 9.99) { intensity = 9.99; }
read->read(clusterNameMap);
ListVector* list = read->getListVector();
- SparseMatrix* matrix = read->getMatrix();
+ SparseDistanceMatrix* matrix = read->getDMatrix();
delete read;
delete clusterNameMap;
listFile >> label >> numOTUs;
+ if (m->debug) { m->mothurOut("[DEBUG]: Getting OTU Data...\n"); }
+
otuData.assign(numSeqs, 0);
cumNumSeqs.assign(numOTUs, 0);
nSeqsPerOTU.assign(numOTUs, 0);
for(int i=0;i<numOTUs;i++){
if (m->control_pressed) { break; }
+ if (m->debug) { m->mothurOut("[DEBUG]: processing OTU " + toString(i) + ".\n"); }
listFile >> singleOTU;
vector<string> setParameters();
string getCommandName() { return "shhh.flows"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Shhh.flows"; }
string getDescription() { return "shhh.flows"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string ShhhSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "shhh_seqs.fasta"; }
+ else if (type == "name") { outputFileName = "shhh_seqs.names"; }
+ else if (type == "map") { outputFileName = "shhh_seqs.map"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ShhhSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
ShhhSeqsCommand::ShhhSeqsCommand(){
if (abort == true) { if (calledHelp) { return 0; } return 2; }
if (outputDir == "") { outputDir = m->hasPath(fastafile); }//if user entered a file with a path then preserve it
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh.fasta";
- string nameFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh.names";
- string mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh.map";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
+ string nameFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("name");
+ string mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("map");
if (groupfile != "") {
//Parse sequences by group
vector<string> setParameters();
string getCommandName() { return "shhh.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Shhh.seqs"; }
string getDescription() { return "shhh.seqs"; }
/***********************************************************************/
-SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string s) :
+SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
Cluster(rav, lv, dm, c, s)
{}
/***********************************************************************/
//This function clusters based on the single linkage method.
-void SingleLinkage::update(double& cutOFF){
+void SingleLinkage::update(double& cutOFF){
try {
- getRowColCells();
+ smallCol = dMatrix->getSmallestCell(smallRow);
+ nColCells = dMatrix->seqVec[smallCol].size();
+ nRowCells = dMatrix->seqVec[smallRow].size();
vector<bool> deleted(nRowCells, false);
int rowInd;
// The vector has to be traversed in reverse order to preserve the index
// for faster removal in removeCell()
for (int i=nRowCells-1;i>=0;i--) {
- if ((rowCells[i]->row == smallRow) && (rowCells[i]->column == smallCol)) {
- rowInd = i; // The index of the smallest distance cell in rowCells
- } else {
- if (rowCells[i]->row == smallRow) {
- search = rowCells[i]->column;
- } else {
- search = rowCells[i]->row;
- }
-
- for (int j=0;j<nColCells;j++) {
- if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) {
- if (colCells[j]->row == search || colCells[j]->column == search) {
- changed = updateDistance(colCells[j], rowCells[i]);
- // If the cell's distance changed and it had the same distance as
- // the smallest distance, invalidate the mins vector in SparseMatrix
- if (changed) {
- if (colCells[j]->vectorMap != NULL) {
- *(colCells[j]->vectorMap) = NULL;
- colCells[j]->vectorMap = NULL;
- }
- }
- removeCell(rowCells[i], i , -1);
- deleted[i] = true;
- break;
- }
- }
- }
- if (!deleted[i]) {
- // Assign the cell to the new cluster
- // remove the old cell from seqVec and add the cell
- // with the new row and column assignment again
- removeCell(rowCells[i], i , -1, false);
- if (search < smallCol){
- rowCells[i]->row = smallCol;
- rowCells[i]->column = search;
- } else {
- rowCells[i]->row = search;
- rowCells[i]->column = smallCol;
- }
- seqVec[rowCells[i]->row].push_back(rowCells[i]);
- seqVec[rowCells[i]->column].push_back(rowCells[i]);
- }
- }
+ if (dMatrix->seqVec[smallRow][i].index == smallCol) {
+ rowInd = i; // The index of the smallest distance cell in rowCells
+ } else {
+ search = dMatrix->seqVec[smallRow][i].index;
+
+ for (int j=0;j<nColCells;j++) {
+ if (dMatrix->seqVec[smallCol][j].index != smallRow) { //if you are not the small cell
+ if (dMatrix->seqVec[smallCol][j].index == search) {
+ changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
+ dMatrix->updateCellCompliment(smallCol, j);
+ dMatrix->rmCell(smallRow, i);
+ deleted[i] = true;
+ break;
+ }
+ }
+ }
+ if (!deleted[i]) {
+ // Assign the cell to the new cluster
+ // remove the old cell from seqVec and add the cell
+ // with the new row and column assignment again
+ float distance = dMatrix->seqVec[smallRow][i].dist;
+ dMatrix->rmCell(smallRow, i);
+ if (search < smallCol){
+ PDistCell value(smallCol, distance);
+ dMatrix->addCell(search, value);
+ } else {
+ PDistCell value(search, distance);
+ dMatrix->addCell(smallCol, value);
+ }
+ sort(dMatrix->seqVec[smallCol].begin(), dMatrix->seqVec[smallCol].end(), compareIndexes);
+ sort(dMatrix->seqVec[search].begin(), dMatrix->seqVec[search].end(), compareIndexes);
+ }
+ }
}
clusterBins();
clusterNames();
// remove also the cell with the smallest distance
- removeCell(rowCells[rowInd], -1 , -1);
+ dMatrix->rmCell(smallRow, rowInd);
}
catch(exception& e) {
m->errorOut(e, "SingleLinkage", "update");
/***********************************************************************/
//This function updates the distance based on the nearest neighbor method.
-bool SingleLinkage::updateDistance(MatData& colCell, MatData& rowCell) {
+bool SingleLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) {
try {
bool changed = false;
- if (colCell->dist > rowCell->dist) {
- colCell->dist = rowCell->dist;
+ if (colCell.dist > rowCell.dist) {
+ colCell.dist = rowCell.dist;
}
return(changed);
}
}
}
+//**********************************************************************************************************************
+string SortSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "sorted" + m->getExtension(inputName); }
+ else if (type == "taxonomy") { outputFileName = "sorted" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "sorted" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "sorted" + m->getExtension(inputName); }
+ else if (type == "flow") { outputFileName = "sorted" + m->getExtension(inputName); }
+ else if (type == "qfile") { outputFileName = "sorted" + m->getExtension(inputName); }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SortSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
SortSeqsCommand::SortSeqsCommand(){
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//read through the correct file and output lines you want to keep
- if (accnosfile != "") { readAccnos(); }
+ if (accnosfile != "") {
+ vector<string> temp;
+ m->readAccnos(accnosfile, temp);
+ for (int i = 0; i < temp.size(); i++) { names[temp[i]] = i; }
+ m->mothurOut("\nUsing " + accnosfile + " to determine the order. It contains " + toString(temp.size()) + " representative sequences.\n");
+ }
+
if (fastafile != "") { readFasta(); }
if (flowfile != "") { readFlow(); }
if (qualfile != "") { readQual(); }
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "sorted" + m->getExtension(fastafile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowfile)) + "sorted" + m->getExtension(flowfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowfile)) + getOutputFileNameTag("flow", flowfile);
outputTypes["flow"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(qualfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "sorted" + m->getExtension(qualfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
outputTypes["qfile"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "sorted" + m->getExtension(namefile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
- outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
+ outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
}
}
//**********************************************************************************************************************
-int SortSeqsCommand::readAccnos(){
- try {
-
- ifstream in;
- m->openInputFile(accnosfile, in);
- string name;
- int count = 0;
-
- while(!in.eof()){
-
- if (m->control_pressed) { break; }
-
- in >> name; m->gobble(in);
-
- if (name != "") {
- names[name] = count;
- count++;
- }
- }
- in.close();
-
- m->mothurOut("\nUsing " + accnosfile + " to determine the order. It contains " + toString(count) + " representative sequences.\n");
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SortSeqsCommand", "readAccnos");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
vector<string> setParameters();
string getCommandName() { return "sort.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Sort.seqs"; }
string getDescription() { return "puts sequences from a fasta, name, group, quality, flow or taxonomy file in the same order"; }
int readFlow();
int readName();
int readGroup();
- int readAccnos();
int readTax();
int readQual();
--- /dev/null
+//
+// sparsedistancematrix.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 7/16/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "sparsedistancematrix.h"
+
+
+/***********************************************************************/
+
+SparseDistanceMatrix::SparseDistanceMatrix() : numNodes(0), smallDist(1e6){ m = MothurOut::getInstance(); sorted=false; aboveCutoff = 1e6; }
+
+/***********************************************************************/
+
+int SparseDistanceMatrix::getNNodes(){
+ return numNodes;
+}
+/***********************************************************************/
+
+void SparseDistanceMatrix::clear(){
+ for (int i = 0; i < seqVec.size(); i++) { seqVec[i].clear(); }
+ seqVec.clear();
+}
+
+/***********************************************************************/
+
+float SparseDistanceMatrix::getSmallDist(){
+ return smallDist;
+}
+/***********************************************************************/
+
+int SparseDistanceMatrix::updateCellCompliment(ull row, ull col){
+ try {
+
+ ull vrow = seqVec[row][col].index;
+ ull vcol = 0;
+
+ //find the columns entry for this cell as well
+ for (int i = 0; i < seqVec[vrow].size(); i++) { if (seqVec[vrow][i].index == row) { vcol = i; break; } }
+
+ seqVec[vrow][vcol].dist = seqVec[row][col].dist;
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparseDistanceMatrix", "updateCellCompliment");
+ exit(1);
+ }
+}
+/***********************************************************************/
+
+int SparseDistanceMatrix::rmCell(ull row, ull col){
+ try {
+ numNodes-=2;
+
+ ull vrow = seqVec[row][col].index;
+ ull vcol = 0;
+
+ //find the columns entry for this cell as well
+ for (int i = 0; i < seqVec[vrow].size(); i++) { if (seqVec[vrow][i].index == row) { vcol = i; break; } }
+
+ seqVec[vrow].erase(seqVec[vrow].begin()+vcol);
+ seqVec[row].erase(seqVec[row].begin()+col);
+
+ return(0);
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparseDistanceMatrix", "rmCell");
+ exit(1);
+ }
+}
+/***********************************************************************/
+void SparseDistanceMatrix::addCell(ull row, PDistCell cell){
+ try {
+ numNodes+=2;
+ if(cell.dist < smallDist){ smallDist = cell.dist; }
+
+ seqVec[row].push_back(cell);
+ PDistCell temp(row, cell.dist);
+ seqVec[cell.index].push_back(temp);
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparseDistanceMatrix", "addCell");
+ exit(1);
+ }
+}
+/***********************************************************************/
+
+ull SparseDistanceMatrix::getSmallestCell(ull& row){
+ try {
+ if (!sorted) { sortSeqVec(); sorted = true; }
+
+ vector<PDistCellMin> mins;
+ smallDist = 1e6;
+
+ for (int i = 0; i < seqVec.size(); i++) {
+ for (int j = 0; j < seqVec[i].size(); j++) {
+
+ //already checked everyone else in row
+ if (i < seqVec[i][j].index) {
+
+ float dist = seqVec[i][j].dist;
+
+ if(dist < smallDist){ //found a new smallest distance
+ mins.clear();
+ smallDist = dist;
+ PDistCellMin temp(i, seqVec[i][j].index);
+ mins.push_back(temp);
+ }
+ else if(dist == smallDist){ //if a subsequent distance is the same as mins distance add the new iterator to the mins vector
+ PDistCellMin temp(i, seqVec[i][j].index);
+ mins.push_back(temp);
+ }
+ }else { j+=seqVec[i].size(); } //stop looking
+ }
+ }
+
+ //random_shuffle(mins.begin(), mins.end()); //randomize the order of the iterators in the mins vector
+
+ row = mins[0].row;
+ ull col = mins[0].col;
+
+ return col;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparseMatrix", "getSmallestCell");
+ exit(1);
+ }
+}
+/***********************************************************************/
+
+int SparseDistanceMatrix::sortSeqVec(){
+ try {
+
+ //saves time in getSmallestCell, by making it so you dont search the repeats
+ for (int i = 0; i < seqVec.size(); i++) { sort(seqVec[i].begin(), seqVec[i].end(), compareIndexes); }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparseMatrix", "getSmallestCell");
+ exit(1);
+ }
+}
+/***********************************************************************/
+
--- /dev/null
+#ifndef Mothur_sparsedistancematrix_h
+#define Mothur_sparsedistancematrix_h
+
+//
+// sparsedistancematrix.h
+// Mothur
+//
+// Created by Sarah Westcott on 7/16/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "mothur.h"
+#include "mothurout.h"
+
+
+class ListVector;
+
+
+/* For each distance in a sparse matrix we have a row, column and distance.
+ The PDistCell consists of the column and distance.
+ We know the row by the distances row in the seqVec matrix.
+ SeqVec is square and each row is sorted so the column values are ascending to save time in the search for the smallest distance. */
+
+/***********************************************************************/
+struct PDistCellMin{
+ ull row;
+ ull col;
+ //PDistCell* cell;
+ PDistCellMin(ull r, ull c) : col(c), row(r) {}
+};
+/***********************************************************************/
+
+
+
+class SparseDistanceMatrix {
+
+public:
+ SparseDistanceMatrix();
+ ~SparseDistanceMatrix(){ clear(); }
+ int getNNodes();
+ ull getSmallestCell(ull& index); //Return the cell with the smallest distance
+ float getSmallDist();
+
+ int rmCell(ull, ull);
+ int updateCellCompliment(ull, ull);
+ void resize(ull n) { seqVec.resize(n); }
+ void clear();
+ void addCell(ull, PDistCell);
+ vector<vector<PDistCell> > seqVec;
+
+private:
+ PDistCell smallCell; //The cell with the smallest distance
+ int numNodes;
+
+ bool sorted;
+ int sortSeqVec();
+ float smallDist, aboveCutoff;
+
+ MothurOut* m;
+};
+
+/***********************************************************************/
+
+
+
+#endif
exit(1);
}
}
-
+//**********************************************************************************************************************
+string SplitAbundCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "fasta"; }
+ else if (type == "list") { outputFileName = "list"; }
+ else if (type == "name") { outputFileName = "names"; }
+ else if (type == "group") { outputFileName = "groups"; }
+ else if (type == "accnos") { outputFileName = "accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitAbundCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
SplitAbundCommand::SplitAbundCommand(){
try {
ofstream aout;
ofstream rout;
- string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list";
+ string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare." + getOutputFileNameTag("list");
m->openOutputFile(rare, rout);
outputNames.push_back(rare); outputTypes["list"].push_back(rare);
- string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list";
+ string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund." + getOutputFileNameTag("list");
m->openOutputFile(abund, aout);
outputNames.push_back(abund); outputTypes["list"].push_back(abund);
temp2 = new ofstream;
filehandles[Groups[i]+".abund"] = temp2;
- m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"]));
- m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"]));
- outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list");
- outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list");
+ string rareGroupFileName = fileroot + Groups[i] + tag + ".rare." + getOutputFileNameTag("list");
+ string abundGroupFileName = fileroot + Groups[i] + tag + ".abund." + getOutputFileNameTag("list");
+ m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
+ m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
+ outputNames.push_back(rareGroupFileName); outputTypes["list"].push_back(rareGroupFileName);
+ outputNames.push_back(abundGroupFileName); outputTypes["list"].push_back(abundGroupFileName);
}
map<string, string> groupVector;
ofstream aout;
ofstream rout;
- string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare.names";
+ string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare." + getOutputFileNameTag("name");
m->openOutputFile(rare, rout);
outputNames.push_back(rare); outputTypes["name"].push_back(rare);
- string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund.names";
+ string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund." + getOutputFileNameTag("name");
m->openOutputFile(abund, aout);
outputNames.push_back(abund); outputTypes["name"].push_back(abund);
temp2 = new ofstream;
filehandles[Groups[i]+".abund"] = temp2;
- m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
- m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
+ string rareGroupFileName = fileroot + Groups[i] + ".rare." + getOutputFileNameTag("name");
+ string abundGroupFileName = fileroot + Groups[i] + ".abund." + getOutputFileNameTag("name");
+ m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
+ m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
}
for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
(*(filehandles[it3->first])).close();
- outputNames.push_back(fileroot + it3->first + ".names"); outputTypes["name"].push_back(fileroot + it3->first + ".names");
+ outputNames.push_back(fileroot + it3->first + "." + getOutputFileNameTag("name")); outputTypes["name"].push_back(fileroot + it3->first + "." + getOutputFileNameTag("name"));
delete it3->second;
}
}
ofstream rout;
- string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare.accnos";
+ string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare." + getOutputFileNameTag("accnos");
m->openOutputFile(rare, rout);
outputNames.push_back(rare); outputTypes["accnos"].push_back(rare);
}
rout.close();
- string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund.accnos";
+ string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund." + getOutputFileNameTag("accnos");
m->openOutputFile(abund, aout);
outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
temp2 = new ofstream;
filehandles[Groups[i]+".abund"] = temp2;
- m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
- m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
+ m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("accnos"), *(filehandles[Groups[i]+".rare"]));
+ m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("accnos"), *(filehandles[Groups[i]+".abund"]));
}
//write rare
//close files
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
(*(filehandles[it3->first])).close();
- outputNames.push_back(fileroot + tag + it3->first + ".accnos"); outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
+ outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("accnos")); outputTypes["accnos"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("accnos"));
delete it3->second;
}
}
ofstream aout;
ofstream rout;
- string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare.groups";
+ string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare." + getOutputFileNameTag("group");
m->openOutputFile(rare, rout);
outputNames.push_back(rare); outputTypes["group"].push_back(rare);
- string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund.groups";
+ string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund." + getOutputFileNameTag("group");
+;
m->openOutputFile(abund, aout);
outputNames.push_back(abund); outputTypes["group"].push_back(abund);
temp2 = new ofstream;
filehandles[Groups[i]+".abund"] = temp2;
- m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
- m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
+ m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("group"), *(filehandles[Groups[i]+".rare"]));
+ m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("group"), *(filehandles[Groups[i]+".abund"]));
}
for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
(*(filehandles[it3->first])).close();
- outputNames.push_back(fileroot + tag + it3->first + ".groups"); outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
+ outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("group")); outputTypes["group"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("group"));
delete it3->second;
}
}
ofstream aout;
ofstream rout;
- string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare.fasta";
+ string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare." + getOutputFileNameTag("fasta");
m->openOutputFile(rare, rout);
outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
- string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund.fasta";
+ string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund." + getOutputFileNameTag("fasta");
m->openOutputFile(abund, aout);
outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
temp2 = new ofstream;
filehandles[Groups[i]+".abund"] = temp2;
- m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
- m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
+ m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("fasta"), *(filehandles[Groups[i]+".rare"]));
+ m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("fasta"), *(filehandles[Groups[i]+".abund"]));
}
//open input file
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
(*(filehandles[it3->first])).close();
- outputNames.push_back(fileroot + tag + it3->first + ".fasta"); outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
+ outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("fasta")); outputTypes["fasta"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("fasta"));
delete it3->second;
}
}
vector<string> setParameters();
string getCommandName() { return "split.abund"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Split.abund"; }
string getDescription() { return "split a list, name, group or fasta file based on abundance"; }
}
}
//**********************************************************************************************************************
+string SplitGroupCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "fasta"; }
+ else if (type == "name") { outputFileName = "names"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitGroupCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
SplitGroupCommand::SplitGroupCommand(){
try {
abort = true; calledHelp = true;
m->mothurOut("Processing group: " + Groups[i]); m->mothurOutEndLine();
- string newFasta = fastafileRoot + Groups[i] + ".fasta";
- string newName = namefileRoot + Groups[i] + ".names";
+ string newFasta = fastafileRoot + Groups[i] + "." + getOutputFileNameTag("fasta");
+ string newName = namefileRoot + Groups[i] + "." + getOutputFileNameTag("name");
parser->getSeqs(Groups[i], newFasta, false);
outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
vector<string> setParameters();
string getCommandName() { return "split.group"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Split.group"; }
string getDescription() { return "split a name or fasta file by group"; }
//build tree from users taxonomy file
PhyloTree* phylo = new PhyloTree();
- ifstream in;
- m->openInputFile(taxFile, in);
-
- //read in users taxonomy file and add sequences to tree
- string seqname, tax;
- while(!in.eof()){
- in >> seqname >> tax; m->gobble(in);
- phylo->addSeqToTree(seqname, tax);
- }
- in.close();
+ map<string, string> temp;
+ m->readTax(taxFile, temp);
+
+ for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+ phylo->addSeqToTree(itTemp->first, itTemp->second);
+ temp.erase(itTemp++);
+ }
phylo->assignHeirarchyIDs(0);
//subsampling may have created some otus with no sequences in them
eliminateZeroOTUS(thislookup);
-
+
if (m->control_pressed) { return m->currentBinLabels; }
//save mothurOut's binLabels to restore for next label
exit(1);
}
}
+//**********************************************************************************************************************
+string SubSampleCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "subsample" + m->getExtension(inputName); }
+ else if (type == "sabund") { outputFileName = "subsample" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "subsample" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "subsample" + m->getExtension(inputName); }
+ else if (type == "list") { outputFileName = "subsample" + m->getExtension(inputName); }
+ else if (type == "rabund") { outputFileName = "subsample" + m->getExtension(inputName); }
+ else if (type == "shared") { outputFileName = "subsample" + m->getExtension(inputName); }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
SubSampleCommand::SubSampleCommand(){
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "subsample" + m->getExtension(fastafile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
ofstream out;
m->openOutputFile(outputFileName, out);
if (namefile != "") {
m->mothurOut("Deconvoluting subsampled fasta file... "); m->mothurOutEndLine();
+ string outputNameFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
//use unique.seqs to create new name and fastafile
string inputString = "fasta=" + outputFileName;
m->mothurOut("/******************************************/"); m->mothurOutEndLine();
delete uniqueCommand;
m->mothurCalling = false;
- outputTypes["name"].push_back(filenames["name"][0]); outputNames.push_back(filenames["name"][0]);
- m->mothurRemove(outputFileName);
- outputFileName = filenames["fasta"][0];
-
+ m->renameFile(filenames["name"][0], outputNameFileName);
+ m->renameFile(filenames["fasta"][0], outputFileName);
+
+ outputTypes["name"].push_back(outputNameFileName); outputNames.push_back(outputNameFileName);
+
m->mothurOut("/******************************************/"); m->mothurOutEndLine();
m->mothurOut("Done."); m->mothurOutEndLine();
string groupOutputDir = outputDir;
if (outputDir == "") { groupOutputDir += m->hasPath(groupfile); }
- string groupOutputFileName = groupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "subsample" + m->getExtension(groupfile);
+ string groupOutputFileName = groupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream outGroup;
m->openOutputFile(groupOutputFileName, outGroup);
int SubSampleCommand::readNames() {
try {
- ifstream in;
- m->openInputFile(namefile, in);
-
- string thisname, repnames;
- map<string, vector<string> >::iterator it;
-
- while(!in.eof()){
-
- if (m->control_pressed) { in.close(); return 0; }
-
- in >> thisname; m->gobble(in); //read from first column
- in >> repnames; //read from second column
-
- it = nameMap.find(thisname);
- if (it == nameMap.end()) {
-
- vector<string> splitRepNames;
- m->splitAtComma(repnames, splitRepNames);
-
- nameMap[thisname] = splitRepNames;
- for (int i = 0; i < splitRepNames.size(); i++) { names.push_back(splitRepNames[i]); }
-
- }else{ m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine(); }
-
- m->gobble(in);
- }
- in.close();
-
+ nameMap.clear();
+ m->readNames(namefile, nameMap);
+
+ //save names of all sequences
+ map<string, vector<string> >::iterator it;
+ for (it = nameMap.begin(); it != nameMap.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { names.push_back((it->second)[i]); } }
+
return 0;
}
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + ".subsample" + m->getExtension(sharedfile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + getOutputFileNameTag("shared", sharedfile);
SubSample sample;
vector<string> subsampledLabels = sample.getSample(thislookup, size);
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "subsample" + m->getExtension(listfile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
ofstream out;
m->openOutputFile(outputFileName, out);
outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(rabundfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + "subsample" + m->getExtension(rabundfile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + getOutputFileNameTag("rabund", rabundfile);
ofstream out;
m->openOutputFile(outputFileName, out);
outputTypes["rabund"].push_back(outputFileName); outputNames.push_back(outputFileName);
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(sabundfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + "subsample" + m->getExtension(sabundfile);
-
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + getOutputFileNameTag("sabund", sabundfile);
ofstream out;
m->openOutputFile(outputFileName, out);
outputTypes["sabund"].push_back(outputFileName); outputNames.push_back(outputFileName);
vector<string> setParameters();
string getCommandName() { return "sub.sample"; }
string getCommandCategory() { return "General"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Sub.sample"; }
string getDescription() { return "get a sampling of sequences from a list, shared, rabund, sabund or fasta file"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string SummaryCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "summary") { outputFileName = "summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SummaryCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
SummaryCommand::SummaryCommand(){
try {
numLines = 0;
numCols = 0;
- string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "summary";
- string fileNameAve = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "ave";
- string fileNameSTD = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "std";
- outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot);
+ string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + getOutputFileNameTag("summary");
+ string fileNameAve = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "ave-std." + getOutputFileNameTag("summary");
+ outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot);
-
-
if (inputFileNames.size() > 1) {
m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
groupIndex[fileNameRoot] = groups[p];
m->openOutputFile(fileNameRoot, outputFileHandle);
outputFileHandle << "label";
- ofstream outAve, outSTD;
+ ofstream outAve;
if (subsample) {
m->openOutputFile(fileNameAve, outAve);
- m->openOutputFile(fileNameSTD, outSTD);
- outputNames.push_back(fileNameAve); outputTypes["ave"].push_back(fileNameAve);
- outputNames.push_back(fileNameSTD); outputTypes["std"].push_back(fileNameSTD);
- outAve << "label"; outSTD << "label";
+ outputNames.push_back(fileNameAve); outputTypes["summary"].push_back(fileNameAve);
+ outAve << "label\tmethod";
outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
- outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
if (inputFileNames.size() > 1) {
groupIndex[fileNameAve] = groups[p];
- groupIndex[fileNameSTD] = groups[p];
}
}
for(int i=0;i<sumCalculators.size();i++){
if(sumCalculators[i]->getCols() == 1){
outputFileHandle << '\t' << sumCalculators[i]->getName();
- if (subsample) { outAve << '\t' << sumCalculators[i]->getName(); outSTD << '\t' << sumCalculators[i]->getName(); }
+ if (subsample) { outAve << '\t' << sumCalculators[i]->getName(); }
numCols++;
}
else{
outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";
- if (subsample) { outAve << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; outSTD << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
+ if (subsample) { outAve << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
numCols += 3;
}
}
outputFileHandle << endl;
- if (subsample) { outSTD << endl; outAve << endl; }
+ if (subsample) { outAve << endl; }
//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
set<string> processedLabels;
- if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete sabund; delete input; return 0; }
+ if (m->control_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete sabund; delete input; return 0; }
while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
- if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete sabund; delete input; return 0; }
+ if (m->control_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete sabund; delete input; return 0; }
if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
processedLabels.insert(sabund->getLabel());
userLabels.erase(sabund->getLabel());
- process(sabund, outputFileHandle, outAve, outSTD);
+ process(sabund, outputFileHandle, outAve);
- if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete sabund; delete input; return 0; }
+ if (m->control_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete sabund; delete input; return 0; }
numLines++;
}
processedLabels.insert(sabund->getLabel());
userLabels.erase(sabund->getLabel());
- process(sabund, outputFileHandle, outAve, outSTD);
+ process(sabund, outputFileHandle, outAve);
- if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete sabund; delete input; return 0; }
+ if (m->control_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete sabund; delete input; return 0; }
numLines++;
//restore real lastlabel to save below
sabund = input->getSAbundVector();
}
- if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete input; return 0; }
+ if (m->control_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete input; return 0; }
//output error messages about any remaining user labels
set<string>::iterator it;
sabund = input->getSAbundVector(lastLabel);
m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
- process(sabund, outputFileHandle, outAve, outSTD);
+ process(sabund, outputFileHandle, outAve);
- if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete sabund; delete input; return 0; }
+ if (m->control_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete sabund; delete input; return 0; }
numLines++;
delete sabund;
}
outputFileHandle.close();
- if (subsample) { outAve.close(); outSTD.close(); }
+ if (subsample) { outAve.close(); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete input; return 0; }
}
}
//**********************************************************************************************************************
-int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, ofstream& outAve, ofstream& outStd) {
+int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, ofstream& outAve) {
try {
//calculator -> data -> values
outputFileHandle << endl;
if (subsample) {
- outAve << sabund->getLabel() << '\t'; outStd << sabund->getLabel() << '\t';
+ outAve << sabund->getLabel() << '\t' << "ave\t";
//find ave and std for this label and output
//will need to modify the createGroupSummary to combine results and not mess with the .summary file.
}
}
+ outAve << endl << sabund->getLabel() << '\t' << "std\t";
for (int i = 0; i < stdDev.size(); i++) { //finds average.
for (int j = 0; j < stdDev[i].size(); j++) {
stdDev[i][j] /= (float) iters;
stdDev[i][j] = sqrt(stdDev[i][j]);
- outStd << stdDev[i][j] << '\t';
+ outAve << stdDev[i][j] << '\t';
}
}
- outAve << endl; outStd << endl;
+ outAve << endl;
}
return 0;
//open each groups summary file
vector<string> newComboNames;
- string newLabel = "";
+
map<string, map<string, vector<string> > > files;
+ map<string, string> filesTypesLabels;
+ map<string, int> filesTypesNumLines;
for (int i=0; i<outputNames.size(); i++) {
- string extension = m->getExtension(outputNames[i]);
- string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
- m->mothurRemove(combineFileName); //remove old file
-
vector<string> thisFilesLines;
ifstream temp;
m->openInputFile(outputNames[i], temp);
//read through first line - labels
- string tempLabel;
- if (i == 0) { //we want to save the labels to output below
- for (int j = 0; j < numCols+1; j++) {
- temp >> tempLabel;
-
- if (j == 1) { newLabel += "group\t" + tempLabel + '\t';
- }else{ newLabel += tempLabel + '\t'; }
- }
- }else{ for (int j = 0; j < numCols+1; j++) { temp >> tempLabel; } }
+ string labelsLine = m->getline(temp);
+ vector<string> theseLabels = m->splitWhiteSpace(labelsLine);
+
+ string newLabel = "";
+ for (int j = 0; j < theseLabels.size(); j++) {
+ if (j == 1) { newLabel += "group\t" + theseLabels[j] + '\t';
+ }else{ newLabel += theseLabels[j] + '\t'; }
+ }
m->gobble(temp);
+ int stop = numLines;
+ if (theseLabels.size() != numCols+1) { stop = numLines*2; }
//for each label
- for (int k = 0; k < numLines; k++) {
+ for (int k = 0; k < stop; k++) {
string thisLine = "";
string tempLabel;
- for (int j = 0; j < numCols+1; j++) {
+ for (int j = 0; j < theseLabels.size(); j++) {
temp >> tempLabel;
//save for later
m->gobble(temp);
}
+ string extension = m->getExtension(outputNames[i]);
+ if (theseLabels.size() != numCols+1) { extension = ".ave-std" + extension; }
+ string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
+ m->mothurRemove(combineFileName); //remove old file
+ filesTypesLabels[extension] = newLabel;
+ filesTypesNumLines[extension] = stop;
+
map<string, map<string, vector<string> > >::iterator itFiles = files.find(extension);
if (itFiles != files.end()) { //add new files info to existing type
files[extension][outputNames[i]] = thisFilesLines;
m->openOutputFile(combineFileName, out);
//output label line to new file
- out << newLabel << endl;
+ out << filesTypesLabels[extension] << endl;
//for each label
- for (int k = 0; k < numLines; k++) {
+ for (int k = 0; k < filesTypesNumLines[extension]; k++) {
//grab summary data for each group
for (map<string, vector<string> >::iterator itType = thisType.begin(); itType != thisType.end(); itType++) {
vector<string> setParameters();
string getCommandName() { return "summary.single"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Summary.single"; }
string getDescription() { return "generate summary file that has the calculator value for each line in the OTU data"; }
vector<string> parseSharedFile(string);
vector<string> createGroupSummaryFile(int, int, vector<string>&, map<string, string>);
- int process(SAbundVector*&, ofstream&, ofstream&, ofstream&);
+ int process(SAbundVector*&, ofstream&, ofstream&);
};
exit(1);
}
}
-
+//**********************************************************************************************************************
+string SummaryQualCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "summary") { outputFileName = "qual.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SummaryQualCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
SummaryQualCommand::SummaryQualCommand(){
try {
if (m->control_pressed) { return 0; }
//print summary file
- string summaryFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + "qual.summary";
+ string summaryFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("summary");
printQual(summaryFile, position, averageQ, scores);
if (m->control_pressed) { m->mothurRemove(summaryFile); return 0; }
vector<string> setParameters();
string getCommandName() { return "summary.qual"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Summary.qual"; }
string getDescription() { return "summarize the quality of a set of sequences"; }
}
}
//**********************************************************************************************************************
+string SummarySharedCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "summary") { outputFileName = "shared.summary"; }
+ else if (type == "phylip") { outputFileName = "dist"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SummarySharedCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
SummarySharedCommand::SummarySharedCommand(){
try {
abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
outputTypes["summary"] = tempOutNames;
+ outputTypes["phylip"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["summary"] = tempOutNames;
+ outputTypes["phylip"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (abort == true) { if (calledHelp) { return 0; } return 2; }
ofstream outputFileHandle, outAll;
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "shared.summary";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("summary");
//if the users entered no valid calculators don't execute command
if (sumCalculators.size() == 0) { return 0; }
outputFileHandle.close();
//create file and put column headers for multiple groups file
- string outAllFileName = ((m->getRootName(sharedfile)) + "sharedmultiple.summary");
+ string outAllFileName = ((m->getRootName(sharedfile)) + "multiple." + getOutputFileNameTag("summary"));
if (mult == true) {
m->openOutputFile(outAllFileName, outAll);
outputNames.push_back(outAllFileName);
//clean up memory
for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
thisItersLookup.clear();
- for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
}else {
if (createPhylip) {
for (int i = 0; i < calcDists.size(); i++) {
matrix[column][row] = dist;
}
- string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist";
+ string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + "." + getOutputFileNameTag("phylip");
outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
ofstream outDist;
m->openOutputFile(distFileName, outDist);
}
}
}
+ for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
}
if (iters != 1) {
stdmatrix[column][row] = stdDist;
}
- string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".ave.dist";
+ string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".ave." + getOutputFileNameTag("phylip");
outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
ofstream outAve;
m->openOutputFile(distFileName, outAve);
outAve.close();
- distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".std.dist";
+ distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".std." + getOutputFileNameTag("phylip");
outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
ofstream outSTD;
m->openOutputFile(distFileName, outSTD);
vector<string> setParameters();
string getCommandName() { return "summary.shared"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Summary.shared"; }
string getDescription() { return "generate a summary file containing calculator values for each line in the OTU data and for all possible comparisons between groups"; }
exit(1);
}
}
-
+//**********************************************************************************************************************
+string SummaryTaxCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "summary") { outputFileName = "tax.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SummaryTaxCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
SummaryTaxCommand::SummaryTaxCommand(){
try {
//print summary file
ofstream outTaxTree;
- string summaryFile = outputDir + m->getRootName(m->getSimpleName(taxfile)) + "tax.summary";
+ string summaryFile = outputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("summary");
m->openOutputFile(summaryFile, outTaxTree);
taxaSum->print(outTaxTree);
outTaxTree.close();
vector<string> setParameters();
string getCommandName() { return "summary.tax"; }
string getCommandCategory() { return "Phylotype Analysis"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Summary.tax"; }
string getDescription() { return "summarize the taxonomies of a set of sequences"; }
vector<string> setParameters();
string getCommandName() { return "system"; }
string getCommandCategory() { return "General"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string) { return ""; }
+ string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/System"; }
string getDescription() { return "execute system commands from within mothur"; }
}
}
//**********************************************************************************************************************
+string TreeGroupCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "tree") { outputFileName = "tre"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TreeGroupCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
TreeGroupCommand::TreeGroupCommand(){
try {
abort = true; calledHelp = true;
TreeGroupCommand::~TreeGroupCommand(){
if (abort == false) {
if (format == "sharedfile") { delete input; }
- else { delete readMatrix; delete matrix; delete list; }
+ else { delete list; }
delete tmap;
}
}else{
//read in dist file
filename = inputfile;
-
+
+ ReadMatrix* readMatrix;
if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }
else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
readMatrix->read(nameMap);
list = readMatrix->getListVector();
- matrix = readMatrix->getMatrix();
+ SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
//make treemap
tmap = new TreeMap();
if (m->control_pressed) { return 0; }
- vector< vector<double> > matrix = makeSimsDist();
+ vector< vector<double> > matrix = makeSimsDist(dMatrix);
+ delete readMatrix;
+ delete dMatrix;
if (m->control_pressed) { return 0; }
//create a new filename
- string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "tre";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + getOutputFileNameTag("tree");
outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
Tree* newTree = createTree(matrix);
}
}
/***********************************************************/
-vector< vector<double> > TreeGroupCommand::makeSimsDist() {
+vector< vector<double> > TreeGroupCommand::makeSimsDist(SparseDistanceMatrix* matrix) {
try {
numGroups = list->size();
//go through sparse matrix and fill sims
//go through each cell in the sparsematrix
- for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
- //similairity = -(distance-1)
- simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);
- simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);
-
- if (m->control_pressed) { return simMatrix; }
+ for (int i = 0; i < matrix->seqVec.size(); i++) {
+ for (int j = 0; j < matrix->seqVec[i].size(); j++) {
+
+ //already checked everyone else in row
+ if (i < matrix->seqVec[i][j].index) {
+ simMatrix[i][matrix->seqVec[i][j].index] = -(matrix->seqVec[i][j].dist -1.0);
+ simMatrix[matrix->seqVec[i][j].index][i] = -(matrix->seqVec[i][j].dist -1.0);
+ if (m->control_pressed) { return simMatrix; }
+ }
+ }
}
return simMatrix;
}
//create a new filename
- string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".ave.tre";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".ave." + getOutputFileNameTag("tree");
outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
//creates tree from similarity matrix and write out file
if (m->control_pressed) { break; }
//create a new filename
- string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".all.tre";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".all." + getOutputFileNameTag("tree");
outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
ofstream outAll;
Tree* conTree = consensus.getTree(trees);
//create a new filename
- string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons.tre";
+ string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons." + getOutputFileNameTag("tree");
outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
ofstream outTree;
m->openOutputFile(conFile, outTree);
}
//create a new filename
- string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + getOutputFileNameTag("tree");
outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
//creates tree from similarity matrix and write out file
vector<string> setParameters();
string getCommandName() { return "tree.shared"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
string getDescription() { return "generate a tree file that describes the dissimilarity among groups"; }
Tree* createTree(vector< vector<double> >&);
void printSims(ostream&, vector< vector<double> >&);
int makeSimsShared();
- vector< vector<double> > makeSimsDist();
+ vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
int writeTree(string, Tree*);
int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
- ReadMatrix* readMatrix;
- SparseMatrix* matrix;
NameAssignment* nameMap;
ListVector* list;
TreeMap* tmap;
TreeMap::~TreeMap(){}
/************************************************************/
int TreeMap::readMap(string gf) {
-
- groupFileName = gf;
- m->openInputFile(gf, fileHandle);
-
- string seqName, seqGroup;
- int error = 0;
-
- while(fileHandle){
- fileHandle >> seqName; m->gobble(fileHandle); //read from first column
- fileHandle >> seqGroup; //read from second column
+ try {
+ groupFileName = gf;
+ m->openInputFile(gf, fileHandle);
- if (m->control_pressed) { fileHandle.close(); return 1; }
-
- setNamesOfGroups(seqGroup);
+ string seqName, seqGroup;
+ int error = 0;
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
- map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
- if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
- else {
- namesOfSeqs.push_back(seqName);
- treemap[seqName].groupname = seqGroup; //store data in map
+ while (!fileHandle.eof()) {
+ if (m->control_pressed) { fileHandle.close(); return 1; }
- it2 = seqsPerGroup.find(seqGroup);
- if (it2 == seqsPerGroup.end()) { //if it's a new group
- seqsPerGroup[seqGroup] = 1;
- }else {//it's a group we already have
- seqsPerGroup[seqGroup]++;
- }
+ fileHandle.read(buffer, 4096);
+ vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { seqName = pieces[i]; columnOne=false; }
+ else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ setNamesOfGroups(seqGroup);
+
+ map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
+ if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ namesOfSeqs.push_back(seqName);
+ treemap[seqName].groupname = seqGroup; //store data in map
+
+ it2 = seqsPerGroup.find(seqGroup);
+ if (it2 == seqsPerGroup.end()) { //if it's a new group
+ seqsPerGroup[seqGroup] = 1;
+ }else {//it's a group we already have
+ seqsPerGroup[seqGroup]++;
+ }
+ }
+ pairDone = false;
+ }
+ }
}
+ fileHandle.close();
- m->gobble(fileHandle);
+ return error;
}
- fileHandle.close();
-
- return error;
+ catch(exception& e) {
+ m->errorOut(e, "TreeMap", "readMap");
+ exit(1);
+ }
}
/************************************************************/
int TreeMap::readMap() {
- string seqName, seqGroup;
- int error = 0;
-
- while(fileHandle){
- fileHandle >> seqName; m->gobble(fileHandle); //read from first column
- fileHandle >> seqGroup; //read from second column
-
- if (m->control_pressed) { fileHandle.close(); return 1; }
-
- setNamesOfGroups(seqGroup);
-
- map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
- if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
- else {
- namesOfSeqs.push_back(seqName);
- treemap[seqName].groupname = seqGroup; //store data in map
-
- it2 = seqsPerGroup.find(seqGroup);
- if (it2 == seqsPerGroup.end()) { //if it's a new group
- seqsPerGroup[seqGroup] = 1;
- }else {//it's a group we already have
- seqsPerGroup[seqGroup]++;
- }
- }
-
- m->gobble(fileHandle);
- }
- fileHandle.close();
-
-
- return error;
+ try {
+ string seqName, seqGroup;
+ int error = 0;
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+
+ while (!fileHandle.eof()) {
+ if (m->control_pressed) { fileHandle.close(); return 1; }
+
+ fileHandle.read(buffer, 4096);
+ vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { seqName = pieces[i]; columnOne=false; }
+ else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ setNamesOfGroups(seqGroup);
+
+ map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
+ if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ namesOfSeqs.push_back(seqName);
+ treemap[seqName].groupname = seqGroup; //store data in map
+
+ it2 = seqsPerGroup.find(seqGroup);
+ if (it2 == seqsPerGroup.end()) { //if it's a new group
+ seqsPerGroup[seqGroup] = 1;
+ }else {//it's a group we already have
+ seqsPerGroup[seqGroup]++;
+ }
+ }
+ pairDone = false;
+ }
+ }
+ }
+ fileHandle.close();
+
+ return error;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TreeMap", "readMap");
+ exit(1);
+ }
}
/************************************************************/
void TreeMap::addSeq(string seqName, string seqGroup) {
exit(1);
}
}
-
+//**********************************************************************************************************************
+string TrimFlowsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "flow") { outputFileName = "flow"; }
+ else if (type == "fasta") { outputFileName = "flow.fasta"; }
+ else if (type == "file") { outputFileName = "flow.files"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimFlowsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
TrimFlowsCommand::TrimFlowsCommand(){
vector<string> tempOutNames;
outputTypes["flow"] = tempOutNames;
outputTypes["fasta"] = tempOutNames;
+ outputTypes["file"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand");
vector<string> tempOutNames;
outputTypes["flow"] = tempOutNames;
outputTypes["fasta"] = tempOutNames;
+ outputTypes["file"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- string trimFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "trim.flow";
+ string trimFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "trim." + getOutputFileNameTag("flow");
outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName);
- string scrapFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "scrap.flow";
+ string scrapFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "scrap." + getOutputFileNameTag("flow");;
outputNames.push_back(scrapFlowFileName); outputTypes["flow"].push_back(scrapFlowFileName);
- string fastaFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.fasta";
+ string fastaFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta");
if(fasta){
outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
}
if(allFiles){
set<string> namesAlreadyProcessed;
- flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
+ flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("file");
m->openOutputFile(flowFilesFileName, output);
for(int i=0;i<barcodePrimerComboFileNames.size();i++){
output.close();
}
else{
- flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
+ flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("file");
m->openOutputFile(flowFilesFileName, output);
output << m->getFullPathName(trimFlowFileName) << endl;
vector<string> setParameters();
string getCommandName() { return "trim.flows"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Trim.flows"; }
string getDescription() { return "trim.flows"; }
exit(1);
}
}
+//**********************************************************************************************************************
+string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "qfile") { outputFileName = "qual"; }
+ else if (type == "fasta") { outputFileName = "fasta"; }
+ else if (type == "group") { outputFileName = "groups"; }
+ else if (type == "name") { outputFileName = "names"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
vector<vector<string> > qualFileNames;
vector<vector<string> > nameFileNames;
- string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
+ string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
- string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
+ string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
- string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
- string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
+ string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
+ string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
if (qFileName != "") {
outputNames.push_back(trimQualFile);
outputTypes["qfile"].push_back(scrapQualFile);
}
- string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
- string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
+ string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
+ string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
if (nameFile != "") {
m->readNames(nameFile, nameMap);
if(oligoFile != ""){
createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
if (createGroup) {
- outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
+ outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
}
}
m->openInputFile(it->first, in);
ofstream out;
- string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
+ string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
m->openOutputFile(thisGroupName, out);
Sequence currSeq(inFASTA); m->gobble(inFASTA);
//cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
+
QualityScores currQual;
if(qFileName != ""){
currQual = QualityScores(qFile); m->gobble(qFile);
+ if ((m->debug)&&(count>15800)) { m->mothurOut("[DEBUG]: " + toString(count) + " fasta = " + currSeq.getName() + '\n'); m->mothurOut("[DEBUG]: " + toString(getpid()) + '\n'); }
}
string origSeq = currSeq.getUnaligned();
currQual.printQScores(trimQualFile);
}
+
if(nameFile != ""){
map<string, string>::iterator itName = nameMap.find(currSeq.getName());
if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+ int numRedundants = 0;
if (nameFile != "") {
map<string, string>::iterator itName = nameMap.find(currSeq.getName());
if (itName != nameMap.end()) {
vector<string> thisSeqsNames;
m->splitAtChar(itName->second, thisSeqsNames, ',');
+ numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
}
}
map<string, int>::iterator it = groupCounts.find(thisGroup);
- if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
- else { groupCounts[it->first]++; }
+ if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; }
+ else { groupCounts[it->first] += (1 + numRedundants); }
}
}
tempNameFileNames,
lines[process],
qLines[process]);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
//pass groupCounts to parent
if(createGroup){
}
for (int i = 0; i < (fastaFilePos.size()-1); i++) {
+ if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
}
vector<string> setParameters();
string getCommandName() { return "trim.seqs"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Trim.seqs"; }
string getDescription() { return "provides the preprocessing features needed to screen and sort pyrosequences"; }
outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+ int numRedundants = 0;
if (pDataArray->nameFile != "") {
map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
if (itName != pDataArray->nameMap.end()) {
vector<string> thisSeqsNames;
pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
+ numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
}
}
map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
- if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; }
- else { pDataArray->groupCounts[it->first]++; }
+ if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1 + numRedundants; }
+ else { pDataArray->groupCounts[it->first] += (1 + numRedundants); }
}
}
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
- CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
+ CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
exit(1);
}
}
+//**********************************************************************************************************************
+string UnifracUnweightedCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "unweighted") { outputFileName = "unweighted"; }
+ else if (type == "uwsummary") { outputFileName = "uwsummary"; }
+ else if (type == "phylip") { outputFileName = "dist"; }
+ else if (type == "column") { outputFileName = "dist"; }
+ else if (type == "tree") { outputFileName = "tre"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "UnifracUnweightedCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
UnifracUnweightedCommand::UnifracUnweightedCommand(){
try {
string temp = validParameter.validFile(parameters, "distance", false);
if (temp == "not found") { phylip = false; outputForm = ""; }
else{
+ if (temp=="phylip") { temp = "lt"; }
if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
}
map<string, string> unique2Dup = reader->getNameMap();
delete reader;
- sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary";
+ sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary");
outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
m->openOutputFile(sumFile, outSum);
counter = 0;
if (random) {
- output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted", itersString);
- outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted");
- outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted");
+ output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"), itersString);
+ outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
+ outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
}
}
}
- string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.ave.dist";
- outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);
-
+ string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.ave." + getOutputFileNameTag("phylip");
+ if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
+ else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
ofstream out;
m->openOutputFile(aveFileName, out);
- string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.std.dist";
- outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName);
-
+ string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.std." + getOutputFileNameTag("phylip");
+ if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
+ else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
ofstream outStd;
m->openOutputFile(stdFileName, outStd);
Tree* conTree = con.getTree(newTrees);
//create a new filename
- string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons.tre";
+ string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons." + getOutputFileNameTag("tree");
outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
ofstream outTree;
m->openOutputFile(conFile, outTree);
vector<Tree*> trees;
//create a new filename
- string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all.tre";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all." + getOutputFileNameTag("tree");
outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
ofstream outAll;
try {
string phylipFileName;
if ((outputForm == "lt") || (outputForm == "square")) {
- phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip.dist";
+ phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip." + getOutputFileNameTag("phylip");
outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
}else { //column
- phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column.dist";
+ phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column." + getOutputFileNameTag("column");
outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
}
vector<string> setParameters();
string getCommandName() { return "unifrac.unweighted"; }
string getCommandCategory() { return "Hypothesis Testing"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "Lozupone C, Knight R (2005). UniFrac: a new phylogenetic method for comparing microbial communities. Appl Environ Microbiol 71: 8228-35. \nhttp://www.mothur.org/wiki/Unifrac.unweighted"; }
string getDescription() { return "generic tests that describes whether two or more communities have the same structure"; }
CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
- CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
+ CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
}
}
//**********************************************************************************************************************
+string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "weighted") { outputFileName = "weighted"; }
+ else if (type == "wsummary") { outputFileName = "wsummary"; }
+ else if (type == "phylip") { outputFileName = "dist"; }
+ else if (type == "column") { outputFileName = "dist"; }
+ else if (type == "tree") { outputFileName = "tre"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
UnifracWeightedCommand::UnifracWeightedCommand(){
try {
abort = true; calledHelp = true;
string temp = validParameter.validFile(parameters, "distance", false);
if (temp == "not found") { phylip = false; outputForm = ""; }
else{
+ if (temp=="phylip") { temp = "lt"; }
if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
}
if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
- sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
+ sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
m->openOutputFile(sumFile, outSum);
outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
if (random) {
- output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
- outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
- outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
+ output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);
+ outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"));
+ outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"));
}
userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
}
}
- string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave.dist";
- outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);
-
+ string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
+ if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
+ else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
ofstream out;
m->openOutputFile(aveFileName, out);
- string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std.dist";
- outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName);
-
+ string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
+ if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
+ else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
ofstream outStd;
m->openOutputFile(stdFileName, outStd);
Tree* conTree = con.getTree(newTrees);
//create a new filename
- string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons.tre";
+ string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");
outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
ofstream outTree;
m->openOutputFile(conFile, outTree);
vector<Tree*> trees;
//create a new filename
- string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all.tre";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");
outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
ofstream outAll;
string phylipFileName;
if ((outputForm == "lt") || (outputForm == "square")) {
- phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
+ phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
}else { //column
- phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
+ phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
}
vector<string> setParameters();
string getCommandName() { return "unifrac.weighted"; }
string getCommandCategory() { return "Hypothesis Testing"; }
- string getHelpString();
+ string getOutputFileNameTag(string, string);
+ string getHelpString();
string getCitation() { return "Lozupone CA, Hamady M, Kelley ST, Knight R (2007). Quantitative and qualitative beta diversity measures lead to different insights into factors that structure microbial communities. Appl Environ Microbiol 73: 1576-85. \nhttp://www.mothur.org/wiki/Unifrac.weighted"; }
string getDescription() { return "generic tests that describes whether two or more communities have the same structure"; }
exit(1);
}
}
+//**********************************************************************************************************************
+
+string VennCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "svg") { outputFileName = "svg"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "VennCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
VennCommand::VennCommand(){
}else if (Estimators[i] == "chao") {
vennCalculators.push_back(new Chao1());
}else if (Estimators[i] == "ace") {
- if(abund < 5)
- abund = 10;
+ if(abund < 5) { abund = 10; }
vennCalculators.push_back(new Ace(abund));
}
}
vector<string> setParameters();
string getCommandName() { return "venn"; }
string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Venn"; }
string getDescription() { return "generates a Venn diagram from data provided in a shared file"; }
/***********************************************************************/
-WeightedLinkage::WeightedLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c, string s) :
+WeightedLinkage::WeightedLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s) :
Cluster(rav, lv, dm, c, s)
{
saveRow = -1;
/***********************************************************************/
//This function updates the distance based on the average linkage method.
-bool WeightedLinkage::updateDistance(MatData& colCell, MatData& rowCell) {
+bool WeightedLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) {
try {
if ((saveRow != smallRow) || (saveCol != smallCol)) {
// rowBin = rabund->get(smallRow);
saveCol = smallCol;
}
- colCell->dist = (colCell->dist + rowCell->dist) / 2.0;
+ colCell.dist = (colCell.dist + rowCell.dist) / 2.0;
return(true);
}