219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */; };
7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; };
8DD76FB00486AB0100D96B5E /* mothur.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6A0FF2C0290799A04C91782 /* mothur.1 */; };
+ A70056E6156A93D000924A2D /* getotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056E5156A93D000924A2D /* getotulabelscommand.cpp */; };
+ A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */; };
A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; };
A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; };
A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; };
A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
+ A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */; };
+ A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */; };
A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; };
A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; };
A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
7E6BE10812F710D8007ADDBE /* refchimeratest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = refchimeratest.h; sourceTree = "<group>"; };
7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = refchimeratest.cpp; sourceTree = "<group>"; };
7E78911B135F3E8600E725D2 /* eachgapdistignorens.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eachgapdistignorens.h; sourceTree = "<group>"; };
- 8DD76FB20486AB0100D96B5E /* Mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Mothur; sourceTree = BUILT_PRODUCTS_DIR; };
+ 8DD76FB20486AB0100D96B5E /* mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mothur; sourceTree = BUILT_PRODUCTS_DIR; };
+ A70056E5156A93D000924A2D /* getotulabelscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getotulabelscommand.cpp; sourceTree = "<group>"; };
+ A70056E8156A93E300924A2D /* getotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getotulabelscommand.h; sourceTree = "<group>"; };
+ A70056E9156AB6D400924A2D /* removeotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removeotulabelscommand.h; sourceTree = "<group>"; };
+ A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removeotulabelscommand.cpp; sourceTree = "<group>"; };
A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = "<group>"; };
A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = "<group>"; };
A795840C13F13CD900F201D5 /* countgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = countgroupscommand.cpp; sourceTree = "<group>"; };
A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makefastqcommand.h; sourceTree = "<group>"; };
A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makefastqcommand.cpp; sourceTree = "<group>"; };
+ A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = listotulabelscommand.cpp; sourceTree = "<group>"; };
+ A7A0671C156294810095C8C5 /* listotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = listotulabelscommand.h; sourceTree = "<group>"; };
+ A7A0671D1562AC230095C8C5 /* makecontigscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makecontigscommand.h; sourceTree = "<group>"; };
+ A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makecontigscommand.cpp; sourceTree = "<group>"; };
A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sortseqscommand.cpp; sourceTree = "<group>"; };
A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sortseqscommand.h; sourceTree = "<group>"; };
A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = otuassociationcommand.cpp; sourceTree = "<group>"; };
1AB674ADFE9D54B511CA2CBB /* Products */ = {
isa = PBXGroup;
children = (
- 8DD76FB20486AB0100D96B5E /* Mothur */,
+ 8DD76FB20486AB0100D96B5E /* mothur */,
);
name = Products;
sourceTree = "<group>";
A7E9B6F412D37EC400DA6239 /* getgroupscommand.cpp */,
A7E9B6F712D37EC400DA6239 /* getlabelcommand.h */,
A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */,
+ A70056E8156A93E300924A2D /* getotulabelscommand.h */,
+ A70056E5156A93D000924A2D /* getotulabelscommand.cpp */,
A7E9B6F912D37EC400DA6239 /* getlineagecommand.h */,
A7E9B6F812D37EC400DA6239 /* getlineagecommand.cpp */,
A7E9B6FB12D37EC400DA6239 /* getlistcountcommand.h */,
A7E9B72B12D37EC400DA6239 /* indicatorcommand.cpp */,
A7E9B73C12D37EC400DA6239 /* libshuffcommand.h */,
A7E9B73B12D37EC400DA6239 /* libshuffcommand.cpp */,
+ A7A0671C156294810095C8C5 /* listotulabelscommand.h */,
+ A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */,
A7E9B73E12D37EC400DA6239 /* listseqscommand.h */,
A7E9B73D12D37EC400DA6239 /* listseqscommand.cpp */,
A7FA10001302E096003860FE /* mantelcommand.h */,
A7FA10011302E096003860FE /* mantelcommand.cpp */,
A724D2B4153C8600000A826F /* makebiomcommand.h */,
A724D2B6153C8628000A826F /* makebiomcommand.cpp */,
+ A7A0671D1562AC230095C8C5 /* makecontigscommand.h */,
+ A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */,
A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */,
A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */,
A7E9B74412D37EC400DA6239 /* makegroupcommand.h */,
A7E9B7C512D37EC400DA6239 /* removelineagecommand.cpp */,
A7E9B7C812D37EC400DA6239 /* removeotuscommand.h */,
A7E9B7C712D37EC400DA6239 /* removeotuscommand.cpp */,
+ A70056E9156AB6D400924A2D /* removeotulabelscommand.h */,
+ A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */,
A727864212E9E28C00F86ABA /* removerarecommand.h */,
A727864312E9E28C00F86ABA /* removerarecommand.cpp */,
A7E9B7CA12D37EC400DA6239 /* removeseqscommand.h */,
name = Mothur;
productInstallPath = "$(HOME)/bin";
productName = mothur;
- productReference = 8DD76FB20486AB0100D96B5E /* Mothur */;
+ productReference = 8DD76FB20486AB0100D96B5E /* mothur */;
productType = "com.apple.product-type.tool";
};
/* End PBXNativeTarget section */
A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */,
219C1DE01552C4BD004209F9 /* newcommandtemplate.cpp in Sources */,
219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */,
+ A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */,
+ A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */,
+ A70056E6156A93D000924A2D /* getotulabelscommand.cpp in Sources */,
+ A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
- CONFIGURATION_TEMP_DIR = /Users/pschloss/Desktop/mothur;
COPY_PHASE_STRIP = NO;
DEPLOYMENT_LOCATION = YES;
GCC_DYNAMIC_NO_PIC = NO;
GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 0;
- INSTALL_PATH = /Users/pschloss/Desktop/mothur/build/Debug;
- OBJROOT = /Users/pschloss/Desktop/mothur;
- PRODUCT_NAME = Mothur;
+ PRODUCT_NAME = mothur;
SDKROOT = macosx10.6;
- SYMROOT = /Users/pschloss/Desktop/mothur;
+ SKIP_INSTALL = NO;
};
name = Debug;
};
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
- CONFIGURATION_TEMP_DIR = /Users/pschloss/Desktop/mothur;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
DEPLOYMENT_LOCATION = YES;
GCC_MODEL_TUNING = G5;
GCC_WARN_UNUSED_VALUE = YES;
- INSTALL_PATH = /Users/pschloss/Desktop/mothur/build/Debug;
- OBJROOT = /Users/pschloss/Desktop/mothur;
- PRODUCT_NAME = Mothur;
- SYMROOT = /Users/pschloss/Desktop/mothur;
+ PRODUCT_NAME = mothur;
+ SKIP_INSTALL = NO;
};
name = Release;
};
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"MOTHUR_FILES=\"\\\"../release\\\"\"",
- "VERSION=\"\\\"1.24.0\\\"\"",
- "RELEASE_DATE=\"\\\"3/12/2012\\\"\"",
+ "VERSION=\"\\\"1.25.0\\\"\"",
+ "RELEASE_DATE=\"\\\"5/01/2012\\\"\"",
);
"GCC_VERSION[arch=*]" = "";
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = "";
- INSTALL_PATH = "";
+ INSTALL_PATH = TARGET_BUILD_DIR;
MACH_O_TYPE = mh_execute;
ONLY_ACTIVE_ARCH = YES;
OTHER_CPLUSPLUSFLAGS = (
"-lreadline",
);
SDKROOT = macosx10.6;
+ SKIP_INSTALL = NO;
USER_HEADER_SEARCH_PATHS = "";
};
name = Debug;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VALUE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
- INSTALL_PATH = "";
+ INSTALL_PATH = TARGET_BUILD_DIR;
MACH_O_TYPE = mh_execute;
OTHER_CPLUSPLUSFLAGS = (
"-DUSE_READLINE",
"-lreadline",
);
SDKROOT = macosx10.6;
+ SKIP_INSTALL = NO;
};
name = Release;
};
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
- BuildableName = "Mothur"
+ BuildableName = "mothur"
BlueprintName = "Mothur"
ReferencedContainer = "container:Mothur.xcodeproj">
</BuildableReference>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
- BuildableName = "Mothur"
+ BuildableName = "mothur"
BlueprintName = "Mothur"
ReferencedContainer = "container:Mothur.xcodeproj">
</BuildableReference>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
- BuildableName = "Mothur"
+ BuildableName = "mothur"
BlueprintName = "Mothur"
ReferencedContainer = "container:Mothur.xcodeproj">
</BuildableReference>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
- BuildableName = "Mothur"
+ BuildableName = "mothur"
BlueprintName = "Mothur"
ReferencedContainer = "container:Mothur.xcodeproj">
</BuildableReference>
#ifndef ALIGNCOMMAND_H
#define ALIGNCOMMAND_H
-//test
/*
* aligncommand.h
* Mothur
/**************************************************************************************************/
-Alignment::Alignment() { /* do nothing */ }
+Alignment::Alignment() { m = MothurOut::getInstance(); /* do nothing */ }
/**************************************************************************************************/
void Alignment::traceBack(){ // This traceback routine is used by the dynamic programming algorithms
try {
- // to fill the values of seqAaln and seqBaln
+ BBaseMap.clear();
+ ABaseMap.clear(); // to fill the values of seqAaln and seqBaln
seqAaln = "";
seqBaln = "";
int row = lB-1;
// matrix
if(currentCell.prevCell == 'x'){ seqAaln = seqBaln = "NOALIGNMENT"; }//If there's an 'x' in the bottom-
- else{ // right corner bail out because it means nothing got aligned
+ else{ // right corner bail out because it means nothing got aligned
+ int count = 0;
while(currentCell.prevCell != 'x'){ // while the previous cell isn't an 'x', keep going...
if(currentCell.prevCell == 'u'){ // if the pointer to the previous cell is 'u', go up in the
seqAaln = '-' + seqAaln; // matrix. this indicates that we need to insert a gap in
seqBaln = seqB[row] + seqBaln; // seqA and a base in seqB
+ BBaseMap[row] = count;
currentCell = alignment[--row][column];
}
else if(currentCell.prevCell == 'l'){ // if the pointer to the previous cell is 'l', go to the left
seqBaln = '-' + seqBaln; // in the matrix. this indicates that we need to insert a gap
seqAaln = seqA[column] + seqAaln; // in seqB and a base in seqA
+ ABaseMap[column] = count;
currentCell = alignment[row][--column];
}
else{
seqAaln = seqA[column] + seqAaln; // otherwise we need to go diagonally up and to the left,
seqBaln = seqB[row] + seqBaln; // here we add a base to both alignments
+ BBaseMap[row] = count;
+ ABaseMap[column] = count;
currentCell = alignment[--row][--column];
}
+ count++;
}
}
- pairwiseLength = seqAaln.length();
+
+ pairwiseLength = seqAaln.length();
seqAstart = 1; seqAend = 0;
seqBstart = 1; seqBend = 0;
-
+ //flip maps since we now know the total length
+ map<int, int> newAMap;
+ for (map<int, int>::iterator it = ABaseMap.begin(); it != ABaseMap.end(); it++) {
+ int spot = it->second;
+ newAMap[pairwiseLength-spot-1] = it->first-1;
+ }
+ ABaseMap = newAMap;
+ map<int, int> newBMap;
+ for (map<int, int>::iterator it = BBaseMap.begin(); it != BBaseMap.end(); it++) {
+ int spot = it->second;
+ newBMap[pairwiseLength-spot-1] = it->first-1;
+ }
+ BBaseMap = newBMap;
+
for(int i=0;i<seqAaln.length();i++){
if(seqAaln[i] != '-' && seqBaln[i] == '-') { seqAstart++; }
else if(seqAaln[i] == '-' && seqBaln[i] != '-') { seqBstart++; }
int Alignment::getTemplateStartPos(){
return seqBstart; // this is called to report the quality of the alignment
}
+/**************************************************************************************************/
+
+map<int, int> Alignment::getSeqAAlnBaseMap(){
+ return ABaseMap;
+}
+/**************************************************************************************************/
+map<int, int> Alignment::getSeqBAlnBaseMap(){
+ return BBaseMap;
+}
/**************************************************************************************************/
int Alignment::getTemplateEndPos(){
// float getAlignmentScore();
string getSeqAAln();
string getSeqBAln();
+ map<int, int> getSeqAAlnBaseMap();
+ map<int, int> getSeqBAlnBaseMap();
int getCandidateStartPos();
int getCandidateEndPos();
int getTemplateStartPos();
int pairwiseLength;
int nRows, nCols, lA, lB;
vector<vector<AlignmentCell> > alignment;
+ map<int, int> ABaseMap;
+ map<int, int> BBaseMap;
MothurOut* m;
};
path = m->getFullPathName(path);
if (m->debug) { m->mothurOut("[DEBUG]: mothur's path = " + path + "\n"); }
-
+
savedOutputDir = outputDir;
string catchAllCommandExe = "";
string catchAllTest = "";
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- catchAllCommandExe += "mono " + path + "CatchAllcmdL.exe ";
if (outputDir == "") { outputDir = "./"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
catchAllTest = path + "CatchAllcmdL.exe";
#else
- catchAllCommandExe += "\"" + path + "CatchAllcmdW.exe\"" + " ";
if (outputDir == "") { outputDir = ".\\"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
catchAllTest = path + "CatchAllcmdW.exe";
#endif
ifstream in;
catchAllTest = m->getFullPathName(catchAllTest);
int ableToOpen = m->openInputFile(catchAllTest, in, "no error"); in.close();
- if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + catchAllTest + " file does not exist. mothur requires the catchall executable to run the catchall command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+ if(ableToOpen == 1) {
+ m->mothurOut(catchAllTest + " file does not exist. Checking path... \n");
+ //check to see if uchime is in the path??
+
+ string programName = "CatchAllcmdW.exe";
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ programName = "CatchAllcmdL.exe";
+#endif
+ string cLocation = m->findProgramPath(programName);
+
+ ifstream in2;
+ ableToOpen = m->openInputFile(cLocation, in2, "no error"); in2.close();
+
+ if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + cLocation + " file does not exist. mothur requires the catchall executable."); m->mothurOutEndLine(); return 0; }
+ else { m->mothurOut("Found catchall in your path, using " + cLocation + "\n"); catchAllTest = cLocation; }
+ }
+ catchAllTest = m->getFullPathName(catchAllTest);
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ catchAllCommandExe += "mono " + catchAllTest + " ";
+#else
+ catchAllCommandExe += "\"" + catchAllTest + "\" ";
+#endif
//prepare full output directory
outputDir = m->getFullPathName(outputDir);
- if (m->debug) { m->mothurOut("[DEBUG]: catchall location = " + catchAllCommandExe + ".\n [DEBUG]: outputDir = " + outputDir + ".\n"); }
+ if (m->debug) { m->mothurOut("[DEBUG]: catchall location = " + catchAllCommandExe + "\n[DEBUG]: outputDir = " + outputDir + "\n"); }
vector<string> inputFileNames;
if (sharedfile != "") { inputFileNames = parseSharedFile(sharedfile); }
string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "catchall.summary";
summaryfilename = m->getFullPathName(summaryfilename);
- if (m->debug) { m->mothurOut("[DEBUG]: Input File = " + inputFileNames[p] + ".\n [DEBUG]: inputdata address = " + toString(&input) + ".\n [DEBUG]: sabund address = " + toString(&sabund) + ".\n"); }
+ if (m->debug) { m->mothurOut("[DEBUG]: Input File = " + inputFileNames[p] + ".\n[DEBUG]: inputdata address = " + toString(&input) + ".\n[DEBUG]: sabund address = " + toString(&sabund) + ".\n"); }
ofstream out;
m->openOutputFile(summaryfilename, out);
#endif
totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, trimFastaFileName);
+ m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine();
#ifdef USE_MPI
}
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
if (parser != NULL) { delete parser; }
- m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
}
//set accnos file as new current accnosfile
ifstream in;
uchimeCommand = m->getFullPathName(uchimeCommand);
int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
- if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
- }
+ if(ableToOpen == 1) {
+ m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
+ //check to see if uchime is in the path??
+
+ string uLocation = m->findProgramPath("uchime");
+
+
+ ifstream in2;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
+#else
+ ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
+#endif
+
+ if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
+ else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
+ }else { uchimeLocation = uchimeCommand; }
+
+ uchimeLocation = m->getFullPathName(uchimeLocation);
+ }
}
catch(exception& e) {
m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
vector<char*> cPara;
- string path = m->argv;
- string tempPath = path;
- for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
- path = path.substr(0, (tempPath.find_last_of('m')));
-
- string uchimeCommand = path;
+ string uchimeCommand = uchimeLocation;
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- uchimeCommand += "uchime ";
+ uchimeCommand += " ";
#else
- uchimeCommand += "uchime";
uchimeCommand = "\"" + uchimeCommand + "\"";
#endif
#else
commandString = "\"" + commandString + "\"";
#endif
+ if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
system(commandString.c_str());
//free memory
// Allocate memory for thread data.
string extension = toString(i) + ".temp";
- uchimeData* tempUchime = new uchimeData(outputFileName+extension, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
+ uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
// Allocate memory for thread data.
string extension = toString(i) + ".temp";
- uchimeData* tempUchime = new uchimeData(outputFName+extension, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
+ uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
int createProcesses(string, string, string, string, int&);
bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract;
- string fastafile, groupfile, templatefile, outputDir, namefile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
+ string fastafile, groupfile, templatefile, outputDir, namefile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation;
int processors;
string namefile;
string groupfile;
string outputFName;
- string accnos, alns, filename, templatefile;
+ string accnos, alns, filename, templatefile, uchimeLocation;
MothurOut* m;
int start;
int end;
string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
uchimeData(){}
- uchimeData(string o, string t, string file, string f, string n, string g, string ac, string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
+ uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac, string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
fastafile = f;
namefile = n;
groupfile = g;
groups = gr;
count = 0;
numChimeras = 0;
+ uchimeLocation = uloc;
}
void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract) {
useAbskew = Abskew;
vector<char*> cPara;
- string path = pDataArray->m->argv;
- string tempPath = path;
- for (int j = 0; j < path.length(); j++) { tempPath[j] = tolower(path[j]); }
- path = path.substr(0, (tempPath.find_last_of('m')));
-
- string uchimeCommand = path;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- uchimeCommand += "uchime ";
-#else
- uchimeCommand += "uchime";
- uchimeCommand = "\"" + uchimeCommand + "\"";
-#endif
+ string uchimeCommand = pDataArray->uchimeLocation;
+ uchimeCommand = "\"" + uchimeCommand + "\"";
char* tempUchime;
tempUchime= new char[uchimeCommand.length()+1];
//uchime_main(numArgs, uchimeParameters);
//cout << "commandString = " << commandString << endl;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
commandString = "\"" + commandString + "\"";
-#endif
+
+ if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
+
system(commandString.c_str());
//free memory
vector<char*> cPara;
- char* tempUchime;
- tempUchime= new char[8];
- *tempUchime = '\0';
- strncat(tempUchime, "uchime ", 7);
- cPara.push_back(tempUchime);
+ string uchimeCommand = pDataArray->uchimeLocation;
+ uchimeCommand = "\"" + uchimeCommand + "\"";
+
+ char* tempUchime;
+ tempUchime= new char[uchimeCommand.length()+1];
+ *tempUchime = '\0';
+ strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
+ cPara.push_back(tempUchime);
char* tempIn = new char[8];
*tempIn = '\0'; strncat(tempIn, "--input", 7);
//uchime_main(numArgs, uchimeParameters);
//cout << "commandString = " << commandString << endl;
+ if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
system(commandString.c_str());
//free memory
m->mothurOutEndLine();
m->mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush();
-
+ if (m->debug) { m->mothurOut("[DEBUG]: Taxonomies read in...\n"); }
+
#ifdef USE_MPI
int pid, num, processors;
vector<unsigned long long> positions;
istringstream iss (tempBuf,istringstream::in);
iss >> name; m->gobble(iss);
iss >> taxInfo;
+ if (m->debug) { m->mothurOut("[DEBUG]: name = " + name + " tax = " + taxInfo + "\n"); }
taxonomy[name] = taxInfo;
phyloTree->addSeqToTree(name, taxInfo);
}
while (!inTax.eof()) {
inTax >> name; m->gobble(inTax);
inTax >> taxInfo;
-
+
+ if (m->debug) { m->mothurOut("[DEBUG]: name = '" + name + "' tax = '" + taxInfo + "'\n"); }
+
taxonomy[name] = taxInfo;
phyloTree->addSeqToTree(name, taxInfo);
}
inTax.close();
#endif
+
+
phyloTree->assignHeirarchyIDs(0);
}
}
+
+ //phylotree adds an extra unknown so we want to remove that
+ if (bestChild.name == "unknown") { bestChildSize--; }
//is this taxonomy above cutoff
int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
taxaSum = new PhyloSummary(groupfile);
}
+
//for each bin in the list vector
+ string snumBins = toString(processList->getNumBins());
for (int i = 0; i < processList->getNumBins(); i++) {
if (m->control_pressed) { break; }
if (m->control_pressed) { out.close(); return 0; }
//output to new names file
- out << (i+1) << '\t' << size << '\t' << conTax << endl;
+ string binLabel = "Otu";
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+
+ out << binLabel << '\t' << size << '\t' << conTax << endl;
string noConfidenceConTax = conTax;
m->removeConfidences(noConfidenceConTax);
}
}
-
}
catch(exception& e) {
m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
int ClassifySeqsCommand::execute(){
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
+
if(method == "bayesian"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip); }
else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand()); }
else {
string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
- RippedTaxName += ".";
-
+ if (RippedTaxName != "") { RippedTaxName += "."; }
+
if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "flip.accnos";
cutoff = c;
aboveCutoff = cutoff + 10000.0;
m = MothurOut::getInstance();
+ if(method == "furthest") { tag = "fn"; }
+ else if (method == "average") { tag = "an"; }
+ else if (method == "weighted") { tag = "wn"; }
+ else if (method == "nearest") { tag = "nn"; }
}
catch(exception& e) {
m->errorOut(e, "ClusterClassic", "ClusterClassic");
ifstream fileHandle;
m->openInputFile(filename, fileHandle);
- fileHandle >> nseqs >> name;
+ string numTest;
+ fileHandle >> numTest >> name;
+
+ if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+ else { convert(numTest, nseqs); }
+
matrixNames.push_back(name);
-#ifndef CLUSTER_H
-#define CLUSTER_H
+#ifndef CLUSTERCLASSIC_H
+#define CLUSTERCLASSIC_H
-#include "mothur.h"
#include "mothurout.h"
#include "listvector.hpp"
#include "rabundvector.hpp"
int getNSeqs() { return nseqs; }
ListVector* getListVector() { return list; }
RAbundVector* getRAbundVector() { return rabund; }
- string getTag();
+ string getTag() { return tag; }
void setMapWanted(bool m);
map<string, int> getSeqtoBin() { return seq2Bin; }
bool mapWanted, sim;
double cutoff, aboveCutoff;
map<string, int> seq2Bin;
- string method;
+ string method, tag;
MothurOut* m;
};
#include "clustersplitcommand.h"
-
//**********************************************************************************************************************
vector<string> ClusterSplitCommand::setParameters(){
try {
CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
+ CommandParameter pclassic("classic", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pclassic);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n";
helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n";
+ helpString += "The classic parameter allows you to indicate that you want to run your files with cluster.classic. It is only valid with splitmethod=fasta. Default=f.\n";
#ifdef USE_MPI
helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
#endif
temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; }
large = m->isTrue(temp);
-
+
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
m->mothurConvert(temp, processors);
else { splitmethod = temp; }
}
+ temp = validParameter.validFile(parameters, "classic", false); if (temp == "not found") { temp = "F"; }
+ classic = m->isTrue(temp);
+
+ if ((splitmethod != "fasta") && classic) { m->mothurOut("splitmethod must be fasta to use cluster.classic.\n"); abort=true; }
+
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.25"; }
m->mothurConvert(temp, cutoff);
cutoff += (5 / (precision * 10.0));
SplitMatrix* split;
if (splitmethod == "distance") { split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large); }
else if (splitmethod == "classify") { split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large); }
- else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir); }
+ else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, classic, outputDir); }
else { m->mothurOut("Not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0; }
split->split();
//cluster each distance file
for (int i = 0; i < distNames.size(); i++) {
- Cluster* cluster = NULL;
- SparseMatrix* matrix = NULL;
- ListVector* list = NULL;
- ListVector oldList;
- RAbundVector* rabund = NULL;
-
- if (m->control_pressed) { return listFileNames; }
-
string thisNamefile = distNames[i].begin()->second;
string thisDistFile = distNames[i].begin()->first;
-
- #ifdef USE_MPI
- int pid;
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-
- //output your files too
- if (pid != 0) {
- cout << endl << "Reading " << thisDistFile << endl;
- }
- #endif
-
- m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
-
- ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
- read->setCutoff(cutoff);
-
- NameAssignment* nameMap = new NameAssignment(thisNamefile);
- nameMap->readMap();
- read->read(nameMap);
-
- if (m->control_pressed) { delete read; delete nameMap; return listFileNames; }
-
- list = read->getListVector();
- oldList = *list;
- matrix = read->getMatrix();
-
- delete read; read = NULL;
- delete nameMap; nameMap = NULL;
-
-
- #ifdef USE_MPI
- //output your files too
- if (pid != 0) {
- cout << endl << "Clustering " << thisDistFile << endl;
- }
- #endif
- m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
-
- rabund = new RAbundVector(list->getRAbundVector());
-
- //create cluster
- if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
- else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
- else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method); }
- tag = cluster->getTag();
-
- if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
- fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
-
- ofstream listFile;
- m->openOutputFile(fileroot+ tag + ".list", listFile);
-
- listFileNames.push_back(fileroot+ tag + ".list");
-
- float previousDist = 0.00000;
- float rndPreviousDist = 0.00000;
-
- oldList = *list;
-
- print_start = true;
- start = time(NULL);
- double saveCutoff = cutoff;
-
- while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
-
- if (m->control_pressed) { //clean up
- delete matrix; delete list; delete cluster; delete rabund;
- listFile.close();
- for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); }
- listFileNames.clear(); return listFileNames;
- }
-
- cluster->update(saveCutoff);
-
- float dist = matrix->getSmallDist();
- float rndDist;
- if (hard) {
- rndDist = m->ceilDist(dist, precision);
- }else{
- rndDist = m->roundDist(dist, precision);
- }
-
- if(previousDist <= 0.0000 && dist != previousDist){
- oldList.setLabel("unique");
- oldList.print(listFile);
- if (labels.count("unique") == 0) { labels.insert("unique"); }
- }
- else if(rndDist != rndPreviousDist){
- oldList.setLabel(toString(rndPreviousDist, length-1));
- oldList.print(listFile);
- if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
- }
-
- previousDist = dist;
- rndPreviousDist = rndDist;
- oldList = *list;
- }
+ string listFileName = "";
+ if (classic) { listFileName = clusterClassicFile(thisDistFile, thisNamefile, labels, smallestCutoff); }
+ else { listFileName = clusterFile(thisDistFile, thisNamefile, labels, smallestCutoff); }
-
- if(previousDist <= 0.0000){
- oldList.setLabel("unique");
- oldList.print(listFile);
- if (labels.count("unique") == 0) { labels.insert("unique"); }
- }
- else if(rndPreviousDist<cutoff){
- oldList.setLabel(toString(rndPreviousDist, length-1));
- oldList.print(listFile);
- if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
- }
-
- delete matrix; delete list; delete cluster; delete rabund;
- matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
- listFile.close();
-
if (m->control_pressed) { //clean up
for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); }
listFileNames.clear(); return listFileNames;
}
-
- m->mothurRemove(thisDistFile);
- m->mothurRemove(thisNamefile);
-
- if (saveCutoff != cutoff) {
- if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
- else { saveCutoff = m->roundDist(saveCutoff, precision); }
-
- m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
- }
-
- if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff; }
- }
+
+ listFileNames.push_back(listFileName);
+ }
cutoff = smallestCutoff;
}
//**********************************************************************************************************************
+string ClusterSplitCommand::clusterClassicFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
+ try {
+ string listFileName = "";
+
+ ListVector* list = NULL;
+ ListVector oldList;
+ RAbundVector* rabund = NULL;
+
+#ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ //output your files too
+ if (pid != 0) {
+ cout << endl << "Reading " << thisDistFile << endl;
+ }
+#endif
+
+ m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
+
+ NameAssignment* nameMap = new NameAssignment(thisNamefile);
+ nameMap->readMap();
+
+ //reads phylip file storing data in 2D vector, also fills list and rabund
+ bool sim = false;
+ ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
+ cluster->readPhylipFile(thisDistFile, nameMap);
+ tag = cluster->getTag();
+
+ if (m->control_pressed) { delete cluster; return 0; }
+
+ list = cluster->getListVector();
+ rabund = cluster->getRAbundVector();
+
+ if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+ fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
+ listFileName = fileroot+ tag + ".list";
+
+ ofstream listFile;
+ m->openOutputFile(fileroot+ tag + ".list", listFile);
+
+ float previousDist = 0.00000;
+ float rndPreviousDist = 0.00000;
+ oldList = *list;
+
+#ifdef USE_MPI
+ //output your files too
+ if (pid != 0) {
+ cout << endl << "Clustering " << thisDistFile << endl;
+ }
+#endif
+
+ m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+
+ while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
+ if (m->control_pressed) { delete cluster; delete list; delete rabund; listFile.close(); return listFileName; }
+
+ cluster->update(cutoff);
+
+ float dist = cluster->getSmallDist();
+ float rndDist;
+ if (hard) {
+ rndDist = m->ceilDist(dist, precision);
+ }else{
+ rndDist = m->roundDist(dist, precision);
+ }
+
+ if(previousDist <= 0.0000 && dist != previousDist){
+ oldList.setLabel("unique");
+ oldList.print(listFile);
+ if (labels.count("unique") == 0) { labels.insert("unique"); }
+ }
+ else if(rndDist != rndPreviousDist){
+ oldList.setLabel(toString(rndPreviousDist, length-1));
+ oldList.print(listFile);
+ if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
+ }
+
+
+ previousDist = dist;
+ rndPreviousDist = rndDist;
+ oldList = *list;
+ }
+
+ if(previousDist <= 0.0000){
+ oldList.setLabel("unique");
+ oldList.print(listFile);
+ if (labels.count("unique") == 0) { labels.insert("unique"); }
+ }
+ else if(rndPreviousDist<cutoff){
+ oldList.setLabel(toString(rndPreviousDist, length-1));
+ oldList.print(listFile);
+ if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
+ }
+
+
+ listFile.close();
+
+ delete cluster; delete nameMap; delete list; delete rabund;
+
+
+ return listFileName;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "clusterClassicFile");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
+ try {
+ string listFileName = "";
+
+ Cluster* cluster = NULL;
+ SparseMatrix* matrix = NULL;
+ ListVector* list = NULL;
+ ListVector oldList;
+ RAbundVector* rabund = NULL;
+
+ if (m->control_pressed) { return listFileName; }
+
+#ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ //output your files too
+ if (pid != 0) {
+ cout << endl << "Reading " << thisDistFile << endl;
+ }
+#endif
+
+ m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
+
+ ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
+ read->setCutoff(cutoff);
+
+ NameAssignment* nameMap = new NameAssignment(thisNamefile);
+ nameMap->readMap();
+ read->read(nameMap);
+
+ if (m->control_pressed) { delete read; delete nameMap; return listFileName; }
+
+ list = read->getListVector();
+ oldList = *list;
+ matrix = read->getMatrix();
+
+ delete read; read = NULL;
+ delete nameMap; nameMap = NULL;
+
+
+#ifdef USE_MPI
+ //output your files too
+ if (pid != 0) {
+ cout << endl << "Clustering " << thisDistFile << endl;
+ }
+#endif
+
+ m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+
+ rabund = new RAbundVector(list->getRAbundVector());
+
+ //create cluster
+ if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
+ else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
+ else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method); }
+ tag = cluster->getTag();
+
+ if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+ fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
+
+ ofstream listFile;
+ m->openOutputFile(fileroot+ tag + ".list", listFile);
+
+ listFileName = fileroot+ tag + ".list";
+
+ float previousDist = 0.00000;
+ float rndPreviousDist = 0.00000;
+
+ oldList = *list;
+
+ print_start = true;
+ start = time(NULL);
+ double saveCutoff = cutoff;
+
+ while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
+
+ if (m->control_pressed) { //clean up
+ delete matrix; delete list; delete cluster; delete rabund;
+ listFile.close();
+ m->mothurRemove(listFileName);
+ return listFileName;
+ }
+
+ cluster->update(saveCutoff);
+
+ float dist = matrix->getSmallDist();
+ float rndDist;
+ if (hard) {
+ rndDist = m->ceilDist(dist, precision);
+ }else{
+ rndDist = m->roundDist(dist, precision);
+ }
+
+ if(previousDist <= 0.0000 && dist != previousDist){
+ oldList.setLabel("unique");
+ oldList.print(listFile);
+ if (labels.count("unique") == 0) { labels.insert("unique"); }
+ }
+ else if(rndDist != rndPreviousDist){
+ oldList.setLabel(toString(rndPreviousDist, length-1));
+ oldList.print(listFile);
+ if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
+ }
+
+ previousDist = dist;
+ rndPreviousDist = rndDist;
+ oldList = *list;
+ }
+
+
+ if(previousDist <= 0.0000){
+ oldList.setLabel("unique");
+ oldList.print(listFile);
+ if (labels.count("unique") == 0) { labels.insert("unique"); }
+ }
+ else if(rndPreviousDist<cutoff){
+ oldList.setLabel(toString(rndPreviousDist, length-1));
+ oldList.print(listFile);
+ if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
+ }
+
+ delete matrix; delete list; delete cluster; delete rabund;
+ matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
+ listFile.close();
+
+ if (m->control_pressed) { //clean up
+ m->mothurRemove(listFileName);
+ return listFileName;
+ }
+
+ m->mothurRemove(thisDistFile);
+ m->mothurRemove(thisNamefile);
+
+ if (saveCutoff != cutoff) {
+ if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
+ else { saveCutoff = m->roundDist(saveCutoff, precision); }
+
+ m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
+ }
+
+ if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff; }
+
+ return listFileName;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "clusterFile");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
try{
#include "readmatrix.hpp"
#include "inputdata.h"
#include "clustercommand.h"
+#include "clusterclassic.h"
class ClusterSplitCommand : public Command {
string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing, splitmethod, taxFile, fastafile;
double cutoff, splitcutoff;
int precision, length, processors, taxLevelCutoff;
- bool print_start, abort, hard, large;
+ bool print_start, abort, hard, large, classic;
time_t start;
ofstream outList, outRabund, outSabund;
void printData(ListVector*);
vector<string> createProcesses(vector< map<string, string> >, set<string>&);
vector<string> cluster(vector< map<string, string> >, set<string>&);
+ string clusterFile(string, string, set<string>&, double&);
+ string clusterClassicFile(string, string, set<string>&, double&);
int mergeLists(vector<string>, map<float, int>, ListVector*);
map<float, int> completeListFile(vector<string>, string, set<string>&, ListVector*&);
int createMergedDistanceFile(vector< map<string, string> >);
#include "createdatabasecommand.h"
#include "makebiomcommand.h"
#include "getcoremicrobiomecommand.h"
+#include "listotulabelscommand.h"
+#include "getotulabelscommand.h"
+#include "removeotulabelscommand.h"
+#include "makecontigscommand.h"
/*******************************************************/
commands["pcr.seqs"] = "pcr.seqs";
commands["create.database"] = "create.database";
commands["make.biom"] = "make.biom";
- commands["get.coremicrobiome"] = "get.coremicrobiome";
+ commands["get.coremicrobiome"] = "get.coremicrobiome";
+ commands["list.otulabels"] = "list.otulabels";
+ commands["get.otulabels"] = "get.otulabels";
+ commands["remove.otulabels"] = "remove.otulabels";
+ commands["make.contigs"] = "make.contigs";
commands["quit"] = "MPIEnabled";
}
else if(commandName == "create.database") { command = new CreateDatabaseCommand(optionString); }
else if(commandName == "make.biom") { command = new MakeBiomCommand(optionString); }
else if(commandName == "get.coremicrobiome") { command = new GetCoreMicroBiomeCommand(optionString); }
+ else if(commandName == "list.otulabels") { command = new ListOtuLabelsCommand(optionString); }
+ else if(commandName == "get.otulabels") { command = new GetOtuLabelsCommand(optionString); }
+ else if(commandName == "remove.otulabels") { command = new RemoveOtuLabelsCommand(optionString); }
+ else if(commandName == "make.contigs") { command = new MakeContigsCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "create.database") { pipecommand = new CreateDatabaseCommand(optionString); }
else if(commandName == "make.biom") { pipecommand = new MakeBiomCommand(optionString); }
else if(commandName == "get.coremicrobiome") { pipecommand = new GetCoreMicroBiomeCommand(optionString); }
+ else if(commandName == "list.otulabels") { pipecommand = new ListOtuLabelsCommand(optionString); }
+ else if(commandName == "get.otulabels") { pipecommand = new GetOtuLabelsCommand(optionString); }
+ else if(commandName == "remove.otulabels") { pipecommand = new RemoveOtuLabelsCommand(optionString); }
+ else if(commandName == "make.contigs") { pipecommand = new MakeContigsCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "create.database") { shellcommand = new CreateDatabaseCommand(); }
else if(commandName == "make.biom") { shellcommand = new MakeBiomCommand(); }
else if(commandName == "get.coremicrobiome") { shellcommand = new GetCoreMicroBiomeCommand(); }
+ else if(commandName == "list.otulabels") { shellcommand = new ListOtuLabelsCommand(); }
+ else if(commandName == "get.otulabels") { shellcommand = new GetOtuLabelsCommand(); }
+ else if(commandName == "remove.otulabels") { shellcommand = new RemoveOtuLabelsCommand(); }
+ else if(commandName == "make.contigs") { shellcommand = new MakeContigsCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
m->openOutputFile(outputFileName, out);
outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
- out << "metric\tlabel\tScore\tpValue\n";
+ out << "metric\tlabel\tScore\tzScore\tstandardDeviation\n";
//as long as you are not at the end of the file or done wih the lines you want
while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
int CooccurrenceCommand::getCooccurrence(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
try {
int numOTUS = thisLookUp[0]->getNumBins();
+
+ if(numOTUS < 2) {
+ m->mothurOut("Not enough OTUs for co-occurrence analysis, skipping"); m->mothurOutEndLine();
+ return 0;
+ }
+
vector< vector<int> > co_matrix; co_matrix.resize(thisLookUp[0]->getNumBins());
for (int i = 0; i < thisLookUp[0]->getNumBins(); i++) { co_matrix[i].resize((thisLookUp.size()), 0); }
vector<int> columntotal; columntotal.resize(thisLookUp.size(), 0);
m->mothurOutEndLine(); m->mothurOut("average metric score: " + toString(nullMean)); m->mothurOutEndLine();
+ //calc_p_value is not a statistical p-value, it's just the average that are either > or < the initscore.
+ //All it does is show what is expected in a competitively structured community
+ //zscore is output so p-value can be looked up in a ztable
double pvalue = 0.0;
if (metric == "cscore" || metric == "checker") { pvalue = trial.calc_pvalue_greaterthan (stats, initscore); }
else{ pvalue = trial.calc_pvalue_lessthan (stats, initscore); }
+
+ double sd = trial.getSD(runs, stats, nullMean);
+
+ double zscore = trial.get_zscore(sd, nullMean, initscore);
- m->mothurOut("pvalue: " + toString(pvalue)); m->mothurOutEndLine();
- out << metric << '\t' << thisLookUp[0]->getLabel() << '\t' << nullMean << '\t' << pvalue << endl;
+ m->mothurOut("zscore: " + toString(zscore)); m->mothurOutEndLine();
+ m->mothurOut("standard deviation: " + toString(sd)); m->mothurOutEndLine();
+ out << metric << '\t' << thisLookUp[0]->getLabel() << '\t' << nullMean << '\t' << zscore << '\t' << sd << endl;
return 0;
}
}
}
/***********************************************************************/
-string Engine::findMothursPath(){
- try {
-
- string envPath = getenv("PATH");
- string mothurPath = "";
-
- //delimiting path char
- char delim;
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- delim = ':';
- #else
- delim = ';';
- #endif
-
- //break apart path variable by ':'
- vector<string> dirs;
- mout->splitAtChar(envPath, dirs, delim);
-
- //get path related to mothur
- for (int i = 0; i < dirs.size(); i++) {
- //to lower so we can find it
- string tempLower = "";
- for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
-
- //is this mothurs path?
- if (tempLower.find("mothur") != -1) { mothurPath = dirs[i]; break; }
- }
-
- if (mothurPath != "") {
- //add mothur so it looks like what argv would look like
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- mothurPath += "/mothur";
- #else
- mothurPath += "\\mothur";
- #endif
- }else {
- //okay mothur is not in the path, so the folder mothur is in must be in the path
- //lets find out which one
-
- //get path related to mothur
- for (int i = 0; i < dirs.size(); i++) {
-
- //is this mothurs path?
- ifstream in;
- string tempIn = dirs[i];
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- tempIn += "/mothur";
- #else
- tempIn += "\\mothur";
- #endif
- mout->openInputFile(tempIn, in, "");
-
- //if this file exists
- if (in) { in.close(); mothurPath = tempIn; break; }
- }
- }
-
- return mothurPath;
-
- }
- catch(exception& e) {
- mout->errorOut(e, "Engine", "findMothursPath");
- exit(1);
- }
-}
+
/***********************************************************************/
InteractEngine::InteractEngine(string path){
string temppath = path.substr(0, (path.find_last_of("othur")-5));
//this will happen if you set the path variable to contain mothur's exe location
- if (temppath == "") { path = findMothursPath(); }
+ if (temppath == "") { path = mout->findProgramPath("mothur"); }
mout->argv = path;
}
string temppath = path.substr(0, (path.find_last_of("othur")-5));
//this will happen if you set the path variable to contain mothur's exe location
- if (temppath == "") { path = findMothursPath(); }
+ if (temppath == "") { path = mout->findProgramPath("mothur"); }
mout->argv = path;
string temppath = path.substr(0, (path.find_last_of("othur")-5));
//this will happen if you set the path variable to contain mothur's exe location
- if (temppath == "") { path = findMothursPath(); }
+ if (temppath == "") { path = mout->findProgramPath("mothur"); }
mout->argv = path;
vector<string> options;
CommandFactory* cFactory;
MothurOut* mout;
- string findMothursPath();
};
--- /dev/null
+//
+// getotulabelscommand.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 5/21/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "getotulabelscommand.h"
+
+//**********************************************************************************************************************
+vector<string> GetOtuLabelsCommand::setParameters(){
+ try {
+ CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
+ CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pconstaxonomy);
+ CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(potucorr);
+ CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pcorraxes);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string GetOtuLabelsCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The get.otulabels command can be used to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
+ helpString += "The get.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n";
+ helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n";
+ helpString += "The otucorr parameter is input the results of the otu.association command.\n";
+ helpString += "The corraxes parameter is input the results of the corr.axes command.\n";
+ helpString += "The get.otulabels commmand should be in the following format: \n";
+ helpString += "get.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+GetOtuLabelsCommand::GetOtuLabelsCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["contaxonomy"] = tempOutNames;
+ outputTypes["otu.corr"] = tempOutNames;
+ outputTypes["corr.axes"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "GetOtuLabelsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+GetOtuLabelsCommand::GetOtuLabelsCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+
+ //edit file types below to include only the types you added as parameters
+
+ string path;
+ it = parameters.find("constaxonomy");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("accnos");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["accnos"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("corraxes");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["corraxes"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("otucorr");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["otucorr"] = inputDir + it->second; }
+ }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["contaxonomy"] = tempOutNames;
+ outputTypes["otu.corr"] = tempOutNames;
+ outputTypes["corr.axes"] = tempOutNames;
+
+ //check for parameters
+ accnosfile = validParameter.validFile(parameters, "accnos", true);
+ if (accnosfile == "not open") { abort = true; }
+ else if (accnosfile == "not found") {
+ accnosfile = m->getAccnosFile();
+ if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }else { m->setAccnosFile(accnosfile); }
+
+ constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
+ if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
+ else if (constaxonomyfile == "not found") { constaxonomyfile = ""; }
+
+ corraxesfile = validParameter.validFile(parameters, "corraxes", true);
+ if (corraxesfile == "not open") { corraxesfile = ""; abort = true; }
+ else if (corraxesfile == "not found") { corraxesfile = ""; }
+
+ otucorrfile = validParameter.validFile(parameters, "otucorr", true);
+ if (otucorrfile == "not open") { otucorrfile = ""; abort = true; }
+ else if (otucorrfile == "not found") { otucorrfile = ""; }
+
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
+ if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "")) { m->mothurOut("You must provide one of the following: constaxonomy, corraxes or otucorr."); m->mothurOutEndLine(); abort = true; }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "GetOtuLabelsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int GetOtuLabelsCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ //get labels you want to keep
+ readAccnos();
+
+ if (m->control_pressed) { return 0; }
+
+ //read through the correct file and output lines you want to keep
+ if (constaxonomyfile != "") { readClassifyOtu(); }
+ if (corraxesfile != "") { readCorrAxes(); }
+ if (otucorrfile != "") { readOtuAssociation(); }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::readClassifyOtu(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(constaxonomyfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomyfile)) + "pick.taxonomy";
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ ifstream in;
+ m->openInputFile(constaxonomyfile, in);
+
+ bool wroteSomething = false;
+ int selectedCount = 0;
+
+ //read headers
+ string headers = m->getline(in);
+ out << headers << endl;
+
+ while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ string otu = ""; string tax = "unknown";
+ int size = 0;
+
+ in >> otu >> size >> tax; m->gobble(in);
+
+ if (labels.count(otu) != 0) {
+ wroteSomething = true;
+ selectedCount++;
+
+ out << otu << '\t' << size << '\t' << tax << endl;
+ }
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file."); m->mothurOutEndLine(); }
+ outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName);
+
+ m->mothurOut("Selected " + toString(selectedCount) + " otus from your constaxonomy file."); m->mothurOutEndLine();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "readClassifyOtu");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::readOtuAssociation(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(otucorrfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(otucorrfile)) + "pick.corr";
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ ifstream in;
+ m->openInputFile(otucorrfile, in);
+
+ bool wroteSomething = false;
+ int selectedCount = 0;
+
+ //read headers
+ string headers = m->getline(in);
+ out << headers << endl;
+
+ while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ string otu1 = "";
+ string otu2 = "";
+ in >> otu1 >> otu2;
+ string line = m->getline(in); m->gobble(in);
+
+ if ((labels.count(otu1) != 0) && (labels.count(otu2) != 0)){
+ wroteSomething = true;
+ selectedCount++;
+
+ out << otu1 << '\t' << otu2 << '\t' << line << endl;
+ }
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file."); m->mothurOutEndLine(); }
+ outputNames.push_back(outputFileName); outputTypes["otu.corr"].push_back(outputFileName);
+
+ m->mothurOut("Selected " + toString(selectedCount) + " lines from your otu.corr file."); m->mothurOutEndLine();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "readOtuAssociation");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::readCorrAxes(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(corraxesfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(corraxesfile)) + "pick.axes";
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+
+ ifstream in;
+ m->openInputFile(corraxesfile, in);
+
+ bool wroteSomething = false;
+ int selectedCount = 0;
+
+ //read headers
+ string headers = m->getline(in);
+ out << headers << endl;
+
+ while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ string otu = "";
+ in >> otu;
+ string line = m->getline(in); m->gobble(in);
+
+ if (labels.count(otu) != 0) {
+ wroteSomething = true;
+ selectedCount++;
+
+ out << otu << '\t' << line << endl;
+ }
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file."); m->mothurOutEndLine(); }
+ outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);
+
+ m->mothurOut("Selected " + toString(selectedCount) + " lines from your corr.axes file."); m->mothurOutEndLine();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "readCorrAxes");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::readAccnos(){
+ try {
+
+ ifstream in;
+ m->openInputFile(accnosfile, in);
+ string name;
+
+ while(!in.eof()){
+ in >> name;
+
+ labels.insert(name);
+
+ m->gobble(in);
+ }
+ in.close();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "readAccnos");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
--- /dev/null
+#ifndef Mothur_getotulabelscommand_h
+#define Mothur_getotulabelscommand_h
+
+//
+// getotulabelscommand.h
+// Mothur
+//
+// Created by Sarah Westcott on 5/21/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+
+#include "command.hpp"
+
+/**************************************************************************************************/
+
+class GetOtuLabelsCommand : public Command {
+public:
+ GetOtuLabelsCommand(string);
+ GetOtuLabelsCommand();
+ ~GetOtuLabelsCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "get.otulabels"; }
+ string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Get.otulabels"; }
+ string getDescription() { return "Can be used with output from classify.otu, otu.association, or corr.axes to select specific otus."; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort;
+ string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile;
+ vector<string> outputNames;
+ set<string> labels;
+
+ int readClassifyOtu();
+ int readOtuAssociation();
+ int readCorrAxes();
+ int readAccnos();
+
+};
+
+/**************************************************************************************************/
+
+
+
+
+
+
+#endif
m->setAllGroups(namesOfGroups);
return error;
}
-
+/************************************************************/
+int GroupMap::readDesignMap(string filename) {
+ groupFileName = filename;
+ m->openInputFile(filename, fileHandle);
+ index = 0;
+ string seqName, seqGroup;
+ int error = 0;
+
+ while(fileHandle){
+ fileHandle >> seqName; m->gobble(fileHandle); //read from first column
+ fileHandle >> seqGroup; //read from second column
+
+ if (m->control_pressed) { fileHandle.close(); return 1; }
+
+ setNamesOfGroups(seqGroup);
+
+ it = groupmap.find(seqName);
+
+ if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ groupmap[seqName] = seqGroup; //store data in map
+ seqsPerGroup[seqGroup]++; //increment number of seqs in that group
+ }
+ m->gobble(fileHandle);
+ }
+ fileHandle.close();
+ m->setAllGroups(namesOfGroups);
+ return error;
+}
/************************************************************/
int GroupMap::getNumGroups() { return namesOfGroups.size(); }
/************************************************************/
class GroupMap {
public:
- GroupMap() {};
+ GroupMap() { m = MothurOut::getInstance(); }
GroupMap(string);
~GroupMap();
int readMap();
int readDesignMap();
+ int readDesignMap(string);
int getNumGroups();
bool isValidGroup(string); //return true if string is a valid group
string getGroup(string);
util.setGroups(Groups, nameGroups);
designMap->setNamesOfGroups(nameGroups);
- //loop through the Groups and fill Globaldata's Groups with the design file info
vector<string> namesSeqs = designMap->getNamesSeqs(Groups);
m->setGroups(namesSeqs);
}
--- /dev/null
+//
+// listotucommand.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 5/15/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "listotulabelscommand.h"
+#include "inputdata.h"
+
+//**********************************************************************************************************************
+vector<string> ListOtuLabelsCommand::setParameters(){
+ try {
+ CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(pshared);
+ CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(prelabund);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
+ CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+ //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListOtuLabelsCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string ListOtuLabelsCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The list.otulabels lists otu labels from shared or relabund file. The results can be used by the get.otulabels to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
+ helpString += "The list.otulabels parameters are: shared, relabund, label and groups.\n";
+ helpString += "The label parameter is used to analyze specific labels in your input.\n";
+ helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n";
+ helpString += "The list.otulabels commmand should be in the following format: \n";
+ helpString += "list.otulabels(shared=yourSharedFile, groups=yourGroup1-yourGroup2)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListOtuLabelsCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+ListOtuLabelsCommand::ListOtuLabelsCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["otulabels"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListOtuLabelsCommand", "ListOtuLabelsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+ListOtuLabelsCommand::ListOtuLabelsCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+ allLines = 1;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+
+ //edit file types below to include only the types you added as parameters
+
+ string path;
+ it = parameters.find("relabund");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["relabund"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["otulabels"] = tempOutNames;
+
+ //check for parameters
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { abort = true; }
+ else if (sharedfile == "not found") { sharedfile = ""; }
+ else { inputFileName = sharedfile; format = "sharedfile"; m->setSharedFile(sharedfile); }
+
+ relabundfile = validParameter.validFile(parameters, "relabund", true);
+ if (relabundfile == "not open") { abort = true; }
+ else if (relabundfile == "not found") { relabundfile = ""; }
+ else { inputFileName = relabundfile; format = "relabund"; m->setRelAbundFile(relabundfile); }
+
+ if ((relabundfile == "") && (sharedfile == "")) {
+ //is there are current file available for either of these?
+ //give priority to shared, then relabund
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { inputFileName = sharedfile; format="sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else {
+ relabundfile = m->getRelAbundFile();
+ if (relabundfile != "") { inputFileName = relabundfile; format="relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }
+ }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(inputFileName); //if user entered a file with a path then preserve it
+ }
+
+ string groups = validParameter.validFile(parameters, "groups", false);
+ if (groups == "not found") { groups = ""; }
+ else { m->splitAtDash(groups, Groups); }
+ m->setGroups(Groups);
+
+ string label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; }
+ else {
+ if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListOtuLabelsCommand", "ListOtuLabelsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int ListOtuLabelsCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ InputData input(inputFileName, format);
+
+ if (format == "relabund") {
+ vector<SharedRAbundFloatVector*> lookup = input.getSharedRAbundFloatVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ createList(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundFloatVectors(lastLabel);
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ createList(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //get next line to process
+ lookup = input.getSharedRAbundFloatVectors();
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input.getSharedRAbundFloatVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ createList(lookup);
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
+ }else {
+
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ createList(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ createList(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //get next line to process
+ lookup = input.getSharedRAbundVectors();
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ createList(lookup);
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListOtuLabelsCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int ListOtuLabelsCommand::createList(vector<SharedRAbundVector*>& lookup){
+ try {
+
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".otu.labels";
+ outputNames.push_back(outputFileName); outputTypes["otulabels"].push_back(outputFileName);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ for (int i = 0; i < m->currentBinLabels.size(); i++) { out << m->currentBinLabels[i] << endl; }
+
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListOtuLabelsCommand", "createTable");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+int ListOtuLabelsCommand::createList(vector<SharedRAbundFloatVector*>& lookup){
+ try {
+
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".otu.labels";
+ outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ for (int i = 0; i < m->currentBinLabels.size(); i++) { out << m->currentBinLabels[i] << endl; }
+
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListOtuLabelsCommand", "createTable");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
--- /dev/null
+#ifndef Mothur_listotucommand_h
+#define Mothur_listotucommand_h
+
+//
+// listotucommand.h
+// Mothur
+//
+// Created by Sarah Westcott on 5/15/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+
+#include "command.hpp"
+#include "sharedrabundvector.h"
+
+/**************************************************************************************************/
+
+class ListOtuLabelsCommand : public Command {
+public:
+ ListOtuLabelsCommand(string);
+ ListOtuLabelsCommand();
+ ~ListOtuLabelsCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "list.otulabels"; }
+ string getCommandCategory() { return "OTU-Based Approaches"; }
+ //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/List.otulabels"; }
+ string getDescription() { return "lists otu labels from shared or relabund file. Can be used by get.otulabels with output from classify.otu, otu.association, or corr.axes to select specific otus."; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort, allLines;
+ string outputDir, sharedfile, relabundfile, label, inputFileName, format;
+ vector<string> outputNames;
+ vector<string> Groups;
+ set<string> labels;
+
+ int createList(vector<SharedRAbundFloatVector*>&);
+ int createList(vector<SharedRAbundVector*>&);
+
+};
+
+/**************************************************************************************************/
+
+
+
+
+
+
+#endif
for (it = parameters.begin(); it != parameters.end(); it++) {
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
-
+
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["biom"] = tempOutNames;
scores.push_back(confidenceScore);
}else{ scores.push_back("null"); }
}
- }
+ }else{ scores.push_back("null"); }
//strip "" if they are there
pos = taxon.find("\"");
--- /dev/null
+//
+// makecontigscommand.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 5/15/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "makecontigscommand.h"
+
+//**********************************************************************************************************************
+vector<string> MakeContigsCommand::setParameters(){
+ try {
+ CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
+ CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta);
+ CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
+ CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
+ CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
+ CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
+ CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
+ CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "",false,false); parameters.push_back(pthreshold);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string MakeContigsCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
+ helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n";
+ helpString += "The ffastq and rfastq parameter is required.\n";
+ helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
+ helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
+ helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n";
+ helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
+ helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n";
+ helpString += "The threshold parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=40.\n";
+ helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
+ helpString += "The make.contigs command should be in the following format: \n";
+ helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
+ helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "getHelpString");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+MakeContigsCommand::MakeContigsCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["qfile"] = tempOutNames;
+ outputTypes["mismatch"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+MakeContigsCommand::MakeContigsCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string, string> parameters = parser.getParameters();
+
+ ValidParameters validParameter("pairwise.seqs");
+ map<string, string>::iterator it;
+
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["qfile"] = tempOutNames;
+ outputTypes["mismatch"] = tempOutNames;
+
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("ffastq");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["ffastq"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("rfastq");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["rfastq"] = inputDir + it->second; }
+ }
+ }
+
+ ffastqfile = validParameter.validFile(parameters, "ffastq", true);
+ if (ffastqfile == "not open") { ffastqfile = ""; abort = true; }
+ else if (ffastqfile == "not found") { ffastqfile = ""; abort=true; m->mothurOut("The ffastq parameter is required.\n"); }
+
+ rfastqfile = validParameter.validFile(parameters, "rfastq", true);
+ if (rfastqfile == "not open") { rfastqfile = ""; abort = true; }
+ else if (rfastqfile == "not found") { rfastqfile = ""; abort=true; m->mothurOut("The rfastq parameter is required.\n"); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(ffastqfile); }
+
+
+ //check for optional parameter and set defaults
+ // ...at some point should added some additional type checking...
+ string temp;
+ temp = validParameter.validFile(parameters, "match", false); if (temp == "not found"){ temp = "1.0"; }
+ m->mothurConvert(temp, match);
+
+ temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found"){ temp = "-1.0"; }
+ m->mothurConvert(temp, misMatch);
+ if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
+
+ temp = validParameter.validFile(parameters, "gapopen", false); if (temp == "not found"){ temp = "-2.0"; }
+ m->mothurConvert(temp, gapOpen);
+ if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
+
+ temp = validParameter.validFile(parameters, "gapextend", false); if (temp == "not found"){ temp = "-1.0"; }
+ m->mothurConvert(temp, gapExtend);
+ if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
+
+ temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "40"; }
+ m->mothurConvert(temp, threshold);
+ if ((threshold < 0) || (threshold > 40)) { m->mothurOut("[ERROR]: threshold must be between 0 and 40.\n"); abort=true; }
+
+ temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
+ m->setProcessors(temp);
+ m->mothurConvert(temp, processors);
+
+ align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; }
+ if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MakeContigsCommand::execute(){
+ try {
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ //read ffastq and rfastq files creating fasta and qual files.
+ //this function will create a forward and reverse, fasta and qual files for each processor.
+ //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
+ int numReads = 0;
+ int start = time(NULL);
+ longestBase = 1000;
+ m->mothurOut("Reading fastq data...\n");
+ vector< vector<string> > files = readFastqFiles(numReads);
+ m->mothurOut("Done.\n");
+
+ if (m->control_pressed) { return 0; }
+
+ string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.fasta";
+ string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.qual";
+ string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.mismatches";
+ outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
+ outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
+ outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
+
+ m->mothurOut("Making contigs...\n");
+ createProcesses(files, outFastaFile, outQualFile, outMisMatchFile);
+ m->mothurOut("Done.\n");
+
+ //remove temp fasta and qual files
+ for (int i = 0; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
+
+ string currentFasta = "";
+ itTypes = outputTypes.find("fasta");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
+ }
+
+ string currentQual = "";
+ itTypes = outputTypes.find("qfile");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); }
+ }
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputMisMatches) {
+ try {
+ int num = 0;
+ vector<int> processIDS;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ int process = 0;
+
+ //loop through and create all the processes you want
+ while (process != processors-1) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputQual + toString(getpid()) + ".temp", outputMisMatches + toString(getpid()) + ".temp");
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = outputFasta + toString(getpid()) + ".num.temp";
+ m->openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ //do my part
+ num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processIDS.size();i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = outputFasta + toString(processIDS[i]) + ".num.temp";
+ m->openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); m->mothurRemove(tempFile);
+ }
+ #else
+
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the contigsData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<contigsData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=0; i<processors-1; i++ ){
+ string extension = toString(i) + ".temp";
+
+ contigsData* tempcontig = new contigsData(files[i], (outputFasta + extension), (outputQual + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, threshold, i);
+ pDataArray.push_back(tempcontig);
+ processIDS.push_back(i);
+
+ hThreadArray[i] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
+ }
+
+ num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ num += pDataArray[i]->count;
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+ #endif
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
+ m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual);
+ m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
+ m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
+ }
+
+ return num;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "createProcesses");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputMisMatches){
+ try {
+
+ Alignment* alignment;
+ if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); }
+ else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); }
+
+ int num = 0;
+ string thisffastafile = files[0];
+ string thisfqualfile = files[1];
+ string thisrfastafile = files[2];
+ string thisrqualfile = files[3];
+
+ if (m->debug) { m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
+
+ ifstream inFFasta, inRFasta, inFQual, inRQual;
+ m->openInputFile(thisffastafile, inFFasta);
+ m->openInputFile(thisfqualfile, inFQual);
+ m->openInputFile(thisrfastafile, inRFasta);
+ m->openInputFile(thisrqualfile, inRQual);
+
+ ofstream outFasta, outQual, outMisMatch;
+ m->openOutputFile(outputFasta, outFasta);
+ m->openOutputFile(outputQual, outQual);
+ m->openOutputFile(outputMisMatches, outMisMatch);
+ outMisMatch << "Name\tLength\tMisMatches\n";
+
+ while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
+
+ if (m->control_pressed) { break; }
+
+ //read seqs and quality info
+ Sequence fSeq(inFFasta); m->gobble(inFFasta);
+ Sequence rSeq(inRFasta); m->gobble(inRFasta);
+ QualityScores fQual(inFQual); m->gobble(inFQual);
+ QualityScores rQual(inRQual); m->gobble(inRQual);
+
+ //flip the reverse reads
+ rSeq.reverseComplement();
+ rQual.flipQScores();
+
+ //pairwise align
+ alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
+ map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
+ map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
+ fSeq.setAligned(alignment->getSeqAAln());
+ rSeq.setAligned(alignment->getSeqBAln());
+ int length = fSeq.getAligned().length();
+
+ //traverse alignments merging into one contiguous seq
+ string contig = "";
+ vector<int> contigScores;
+ int numMismatches = 0;
+ string seq1 = fSeq.getAligned();
+ string seq2 = rSeq.getAligned();
+ vector<int> scores1 = fQual.getQualityScores();
+ vector<int> scores2 = rQual.getQualityScores();
+
+ // if (num < 5) { cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
+ int overlapStart = fSeq.getStartPos();
+ int seq2Start = rSeq.getStartPos();
+ //bigger of the 2 starting positions is the location of the overlapping start
+ if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
+ overlapStart = seq2Start;
+ for (int i = 0; i < overlapStart; i++) {
+ contig += seq1[i];
+ contigScores.push_back(scores1[ABaseMap[i]]);
+ }
+ }else { //seq1 starts later so take from 0 to overlapStart from seq2
+ for (int i = 0; i < overlapStart; i++) {
+ contig += seq2[i];
+ contigScores.push_back(scores2[BBaseMap[i]]);
+ }
+ }
+
+ int seq1End = fSeq.getEndPos();
+ int seq2End = rSeq.getEndPos();
+ int overlapEnd = seq1End;
+ if (seq2End < overlapEnd) { overlapEnd = seq2End; } //smallest end position is where overlapping ends
+
+ for (int i = overlapStart; i < overlapEnd; i++) {
+ if (seq1[i] == seq2[i]) { //match, add base and choose highest score
+ contig += seq1[i];
+ contigScores.push_back(scores1[ABaseMap[i]]);
+ if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; }
+ }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
+ if (scores2[BBaseMap[i]] < threshold) { } //
+ else {
+ contig += seq2[i];
+ contigScores.push_back(scores2[BBaseMap[i]]);
+ }
+ }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
+ if (scores1[ABaseMap[i]] < threshold) { } //
+ else {
+ contig += seq1[i];
+ contigScores.push_back(scores1[ABaseMap[i]]);
+ }
+ }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
+ char c = seq1[i];
+ contigScores.push_back(scores1[ABaseMap[i]]);
+ if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; c = seq2[i]; }
+ contig += c;
+ numMismatches++;
+ }else { //should never get here
+ m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
+ }
+ }
+
+ if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
+ for (int i = overlapEnd; i < length; i++) {
+ contig += seq2[i];
+ contigScores.push_back(scores2[BBaseMap[i]]);
+ }
+ }else { //seq2 ends before seq1 so take from overlap to length from seq1
+ for (int i = overlapEnd; i < length; i++) {
+ contig += seq1[i];
+ contigScores.push_back(scores1[ABaseMap[i]]);
+ }
+
+ }
+ //if (num < 5) { cout << overlapStart << '\t' << overlapEnd << endl << seq1 << endl << seq2 << endl<< contig << endl; }
+ //output
+ outFasta << ">" << fSeq.getName() << endl << contig << endl;
+ outQual << ">" << fSeq.getName() << endl;
+ for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
+ outQual << endl;
+ outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
+
+ num++;
+
+ //report progress
+ if((num) % 1000 == 0){ m->mothurOut(toString(num)); m->mothurOutEndLine(); }
+ }
+
+ //report progress
+ if((num) % 1000 != 0){ m->mothurOut(toString(num)); m->mothurOutEndLine(); }
+
+ inFFasta.close();
+ inFQual.close();
+ inRFasta.close();
+ inRQual.close();
+ outFasta.close();
+ outQual.close();
+ outMisMatch.close();
+ delete alignment;
+
+ if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta); m->mothurRemove(outputMisMatches);}
+
+ return num;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "driver");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
+ try {
+ vector< vector<string> > files;
+
+ //maps processors number to file pointer
+ map<int, vector<ofstream*> > tempfiles; //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
+ map<int, vector<ofstream*> >::iterator it;
+
+ //create files to write to
+ for (int i = 0; i < processors; i++) {
+ vector<ofstream*> temp;
+ ofstream* outFF = new ofstream; temp.push_back(outFF);
+ ofstream* outFQ = new ofstream; temp.push_back(outFQ);
+ ofstream* outRF = new ofstream; temp.push_back(outRF);
+ ofstream* outRQ = new ofstream; temp.push_back(outRQ);
+ tempfiles[i] = temp;
+
+ vector<string> names;
+ string ffastafilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "ffasta.temp";
+ string rfastafilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rfasta.temp";
+ string fqualfilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "fqual.temp";
+ string rqualfilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rqual.temp";
+ names.push_back(ffastafilename); names.push_back(fqualfilename);
+ names.push_back(rfastafilename); names.push_back(rqualfilename);
+ files.push_back(names);
+
+ m->openOutputFile(ffastafilename, *outFF);
+ m->openOutputFile(rfastafilename, *outRF);
+ m->openOutputFile(fqualfilename, *outFQ);
+ m->openOutputFile(rqualfilename, *outRQ);
+ }
+
+ if (m->control_pressed) {
+ //close files, delete ofstreams
+ for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } }
+ //remove files
+ for (int i = 0; i < files.size(); i++) {
+ for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
+ }
+ }
+
+ ifstream inForward;
+ m->openInputFile(ffastqfile, inForward);
+
+ ifstream inReverse;
+ m->openInputFile(rfastqfile, inReverse);
+
+ count = 0;
+ while ((!inForward.eof()) && (!inReverse.eof())) {
+
+ if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
+
+ //get a read from forward and reverse fastq files
+ fastqRead fread = readFastq(inForward);
+ fastqRead rread = readFastq(inReverse);
+ checkReads(fread, rread);
+
+ if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
+
+ //if the reads are okay write to output files
+ int process = count % processors;
+
+ *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
+ *(tempfiles[process][1]) << ">" << fread.name << endl;
+ for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
+ *(tempfiles[process][1]) << endl;
+ *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
+ *(tempfiles[process][3]) << ">" << rread.name << endl;
+ for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
+ *(tempfiles[process][3]) << endl;
+
+ count++;
+
+ //report progress
+ if((count) % 10000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
+
+ }
+ //report progress
+ if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
+
+
+
+ //close files, delete ofstreams
+ for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } }
+ inForward.close();
+ inReverse.close();
+
+ //adjust for really large processors or really small files
+ if (count < processors) {
+ for (int i = count; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
+ files.resize(count);
+ processors = count;
+ }
+
+ return files;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+fastqRead MakeContigsCommand::readFastq(ifstream& in){
+ try {
+ fastqRead read;
+
+ //read sequence name
+ string name = m->getline(in); m->gobble(in);
+ if (name == "") { m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+ else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+ else { name = name.substr(1); }
+
+ //read sequence
+ string sequence = m->getline(in); m->gobble(in);
+ if (sequence == "") { m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+
+ //read sequence name
+ string name2 = m->getline(in); m->gobble(in);
+ if (name2 == "") { m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+ else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+ else { name2 = name2.substr(1); }
+
+ //read quality scores
+ string quality = m->getline(in); m->gobble(in);
+ if (quality == "") { m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+
+ //sanity check sequence length and number of quality scores match
+ if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; return read; } }
+ if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+
+ vector<int> qualScores;
+ int controlChar = int('@');
+ for (int i = 0; i < quality.length(); i++) {
+ int temp = int(quality[i]);
+ temp -= controlChar;
+
+ qualScores.push_back(temp);
+ }
+
+ read.name = name;
+ read.sequence = sequence;
+ read.scores = qualScores;
+
+ return read;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "readFastq");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){
+ try {
+ bool good = true;
+
+ //fix names
+ if ((forward.name.length() > 2) && (reverse.name.length() > 2)) {
+ forward.name = forward.name.substr(0, forward.name.length()-2);
+ reverse.name = reverse.name.substr(0, reverse.name.length()-2);
+ }else { good = false; m->control_pressed = true; }
+
+ //do names match
+ if (forward.name != reverse.name) {
+ m->mothurOut("[ERROR]: read " + forward.name + " from " + ffastqfile + ", but read " + reverse.name + " from " + rfastqfile + ".\n");
+ good = false; m->control_pressed = true;
+ }
+
+ //do sequence lengths match
+ if (forward.sequence.length() != reverse.sequence.length()) {
+ m->mothurOut("[ERROR]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ".\n");
+ good = false; m->control_pressed = true;
+ }
+
+ //do number of qual scores match
+ if (forward.scores.size() != reverse.scores.size()) {
+ m->mothurOut("[ERROR]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ".\n");
+ good = false; m->control_pressed = true;
+ }
+
+ return good;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "readFastq");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
+
+
--- /dev/null
+#ifndef Mothur_makecontigscommand_h
+#define Mothur_makecontigscommand_h
+
+//
+// makecontigscommand.h
+// Mothur
+//
+// Created by Sarah Westcott on 5/15/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "command.hpp"
+#include "sequence.hpp"
+#include "qualityscores.h"
+#include "alignment.hpp"
+#include "gotohoverlap.hpp"
+#include "needlemanoverlap.hpp"
+#include "blastalign.hpp"
+#include "noalign.hpp"
+
+
+struct fastqRead {
+ vector<int> scores;
+ string name;
+ string sequence;
+
+ fastqRead() { name = ""; sequence = ""; scores.clear(); };
+ fastqRead(string n, string s, vector<int> sc) : name(n), sequence(s), scores(sc) {};
+ ~fastqRead() {};
+};
+
+/**************************************************************************************************/
+
+class MakeContigsCommand : public Command {
+public:
+ MakeContigsCommand(string);
+ MakeContigsCommand();
+ ~MakeContigsCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "make.contigs"; }
+ string getCommandCategory() { return "Sequence Processing"; }
+ //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Make.contigs"; }
+ string getDescription() { return "description"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort;
+ string outputDir, ffastqfile, rfastqfile, align;
+ float match, misMatch, gapOpen, gapExtend;
+ int processors, longestBase, threshold;
+ vector<string> outputNames;
+
+ fastqRead readFastq(ifstream&);
+ vector< vector<string> > readFastqFiles(int&);
+ bool checkReads(fastqRead&, fastqRead&);
+ int createProcesses(vector< vector<string> >, string, string, string);
+ int driver(vector<string>, string, string, string);
+};
+
+/**************************************************************************************************/
+
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct contigsData {
+ string outputFasta;
+ string outputQual;
+ string outputMisMatches;
+ string align;
+ vector<string> files;
+ MothurOut* m;
+ float match, misMatch, gapOpen, gapExtend;
+ int count, threshold, threadID;
+
+ contigsData(){}
+ contigsData(vector<string> f, string of, string oq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int tid) {
+ files = f;
+ outputFasta = of;
+ outputQual = oq;
+ outputMisMatches = om;
+ m = mout;
+ match = ma;
+ misMatch = misMa;
+ gapOpen = gapO;
+ gapExtend = gapE;
+ threshold = thr;
+ align = al;
+ count = 0;
+ threadID = tid;
+ }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
+ contigsData* pDataArray;
+ pDataArray = (contigsData*)lpParam;
+
+ try {
+ int longestBase = 1000;
+ Alignment* alignment;
+ if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, longestBase); }
+ else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase); }
+
+ int num = 0;
+ string thisffastafile = pDataArray->files[0];
+ string thisfqualfile = pDataArray->files[1];
+ string thisrfastafile = pDataArray->files[2];
+ string thisrqualfile = pDataArray->files[3];
+
+ if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
+
+ ifstream inFFasta, inRFasta, inFQual, inRQual;
+ pDataArray->m->openInputFile(thisffastafile, inFFasta);
+ pDataArray->m->openInputFile(thisfqualfile, inFQual);
+ pDataArray->m->openInputFile(thisrfastafile, inRFasta);
+ pDataArray->m->openInputFile(thisrqualfile, inRQual);
+
+ ofstream outFasta, outQual, outMisMatch;
+ pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta);
+ pDataArray->m->openOutputFile(pDataArray->outputQual, outQual);
+ pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch);
+ outMisMatch << "Name\tLength\tMisMatches\n";
+
+ while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ //read seqs and quality info
+ Sequence fSeq(inFFasta); pDataArray->m->gobble(inFFasta);
+ Sequence rSeq(inRFasta); pDataArray->m->gobble(inRFasta);
+ QualityScores fQual(inFQual); pDataArray->m->gobble(inFQual);
+ QualityScores rQual(inRQual); pDataArray->m->gobble(inRQual);
+
+ //flip the reverse reads
+ rSeq.reverseComplement();
+ rQual.flipQScores();
+
+ //pairwise align
+ alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
+ map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
+ map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
+ fSeq.setAligned(alignment->getSeqAAln());
+ rSeq.setAligned(alignment->getSeqBAln());
+ int length = fSeq.getAligned().length();
+
+ //traverse alignments merging into one contiguous seq
+ string contig = "";
+ vector<int> contigScores;
+ int numMismatches = 0;
+ string seq1 = fSeq.getAligned();
+ string seq2 = rSeq.getAligned();
+
+ vector<int> scores1 = fQual.getQualityScores();
+ vector<int> scores2 = rQual.getQualityScores();
+
+ for (int i = 0; i < length; i++) {
+ if (seq1[i] == seq2[i]) { //match, add base and choose highest score
+ contig += seq1[i];
+ contigScores.push_back(scores1[ABaseMap[i]]);
+ if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; }
+ }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
+ if (scores2[BBaseMap[i]] >= pDataArray->threshold)) {
+ contig += seq2[i];
+ contigScores.push_back(scores2[BBaseMap[i]]);
+ }
+ }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
+ if (scores1[ABaseMap[i]] >= pDataArray->threshold) {
+ contig += seq1[i];
+ contigScores.push_back(scores1[ABaseMap[i]]);
+ }
+ }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
+ char c = seq1[i];
+ contigScores.push_back(scores1[ABaseMap[i]]);
+ if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; c = seq2[i]; }
+ contig += c;
+ numMismatches++;
+ }else { //should never get here
+ pDataArray->m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
+ }
+ }
+
+ //output
+ outFasta << ">" << fSeq.getName() << endl << contig << endl;
+ outQual << ">" << fSeq.getName() << endl;
+ for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
+ outQual << endl;
+ outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
+
+ num++;
+
+ //report progress
+ if((num) % 1000 == 0){ pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine(); }
+ }
+
+ //report progress
+ if((num) % 1000 != 0){ pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine(); }
+
+ inFFasta.close();
+ inFQual.close();
+ inRFasta.close();
+ inRQual.close();
+ outFasta.close();
+ outQual.close();
+ outMisMatch.close();
+ delete alignment;
+
+ if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(pDataArray->outputQual); pDataArray->m->mothurRemove(pDataArray->outputFasta); pDataArray->m->mothurRemove(pDataArray->outputMisMatches);}
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "AlignCommand", "MyContigsThreadFunction");
+ exit(1);
+ }
+}
+#endif
+
+
+#endif
try {
string qualScores;
- int controlChar = int('!');
+ int controlChar = int('@');
for (int i = 0; i < qual.size(); i++) {
int temp = qual[i] + controlChar;
CYGWIN_BUILD ?= no
USECOMPRESSION ?= no
MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"4/30/2012\""
-VERSION = "\"1.25.0\""
+RELEASE_DATE = "\"5/14/2012\""
+VERSION = "\"1.25.1\""
FORTAN_COMPILER = gfortran
FORTRAN_FLAGS =
out << simMatrix.size() << endl;
if (output == "lt") {
- for (int m = 0; m < simMatrix.size(); m++) {
- out << lookup[m]->getGroup() << '\t';
- for (int n = 0; n < m; n++) {
- out << simMatrix[m][n] << '\t';
+ for (int b = 0; b < simMatrix.size(); b++) {
+ out << lookup[b]->getGroup() << '\t';
+ for (int n = 0; n < b; n++) {
+ out << simMatrix[b][n] << '\t';
}
out << endl;
}
}else{
- for (int m = 0; m < simMatrix.size(); m++) {
- out << lookup[m]->getGroup() << '\t';
- for (int n = 0; n < simMatrix[m].size(); n++) {
- out << simMatrix[m][n] << '\t';
+ for (int b = 0; b < simMatrix.size(); m++) {
+ out << lookup[b]->getGroup() << '\t';
+ for (int n = 0; n < simMatrix[b].size(); n++) {
+ out << simMatrix[b][n] << '\t';
}
out << endl;
}
exit(1);
}
}
+/***********************************************************************/
+string MothurOut::findProgramPath(string programName){
+ try {
+
+ string envPath = getenv("PATH");
+ string pPath = "";
+
+ //delimiting path char
+ char delim;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ delim = ':';
+#else
+ delim = ';';
+#endif
+
+ //break apart path variable by ':'
+ vector<string> dirs;
+ splitAtChar(envPath, dirs, delim);
+
+ if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
+
+ //get path related to mothur
+ for (int i = 0; i < dirs.size(); i++) {
+
+ if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
+
+ //to lower so we can find it
+ string tempLower = "";
+ for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
+
+ //is this mothurs path?
+ if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
+ }
+
+ if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
+
+ if (pPath != "") {
+ //add programName so it looks like what argv would look like
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ pPath += "/" + programName;
+#else
+ pPath += "\\" + programName;
+#endif
+ }else {
+ //okay programName is not in the path, so the folder programName is in must be in the path
+ //lets find out which one
+
+ //get path related to the program
+ for (int i = 0; i < dirs.size(); i++) {
+
+ if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
+
+ //is this the programs path?
+ ifstream in;
+ string tempIn = dirs[i];
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ tempIn += "/" + programName;
+#else
+ tempIn += "\\" + programName;
+#endif
+ openInputFile(tempIn, in, "");
+
+ //if this file exists
+ if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
+ }
+ }
+
+ return pPath;
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "findProgramPath");
+ exit(1);
+ }
+}
/*********************************************************************************************/
void MothurOut::setFileName(string filename) {
try {
int appendFiles(string, string);
int renameFile(string, string); //oldname, newname
string getFullPathName(string);
+ string findProgramPath(string programName);
string hasPath(string);
string getExtension(string);
string getPathName(string);
try {
CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pshared);
CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(prelabund);
+ CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pmetadata);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "",false,false); parameters.push_back(pmethod);
try {
string helpString = "";
helpString += "The otu.association command reads a shared or relabund file and calculates the correlation coefficients between otus.\n";
- helpString += "The otu.association command parameters are shared, relabund, groups, method and label. The shared or relabund parameter is required.\n";
+ helpString += "If you provide a metadata file, mothur will calculate te correlation bewteen the metadata and the otus.\n";
+ helpString += "The otu.association command parameters are shared, relabund, metadata, groups, method and label. The shared or relabund parameter is required.\n";
helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
helpString += "The label parameter allows you to select what distances level you would like used, and are also separated by dashes.\n";
helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["relabund"] = inputDir + it->second; }
}
+
+ it = parameters.find("metadata");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["metadata"] = inputDir + it->second; }
+ }
}
else if (relabundfile == "not found") { relabundfile = ""; }
else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
+ metadatafile = validParameter.validFile(parameters, "metadata", true);
+ if (metadatafile == "not open") { abort = true; metadatafile = ""; }
+ else if (metadatafile == "not found") { metadatafile = ""; }
+
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; pickedGroups = false; }
else {
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ if (metadatafile != "") { readMetadata(); }
//function are identical just different datatypes
if (sharedfile != "") { processShared(); }
InputData* input = new InputData(sharedfile, "sharedfile");
vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
string lastLabel = lookup[0]->getLabel();
+
+ if (metadatafile != "") {
+ getMetadata();
+ bool error = false;
+ if (metadata[0].size() != lookup.size()) { m->mothurOut("[ERROR]: You have selected to use " + toString(metadata[0].size()) + " data rows from the metadata file, but " + toString(lookup.size()) + " from the shared file.\n"); m->control_pressed = true; error=true;}
+ if (error) {
+ //maybe add extra info here?? compare groups in each file??
+ }
+ }
//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
set<string> processedLabels;
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
//column headings
- out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n";
+ if (metadatafile == "") { out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n"; }
+ else { out << "OTUA\tMetadata\t" << method << "Coef\tSignificance\n"; }
+
vector< vector<double> > xy; xy.resize(lookup[0]->getNumBins());
for (int i = 0; i < lookup[0]->getNumBins(); i++) { for (int j = 0; j < lookup.size(); j++) { xy[i].push_back(lookup[j]->getAbundance(i)); } }
LinearAlgebra linear;
- for (int i = 0; i < xy.size(); i++) {
-
- for (int k = 0; k < i; k++) {
-
- if (m->control_pressed) { out.close(); return 0; }
+ if (metadatafile == "") {//compare otus
+ for (int i = 0; i < xy.size(); i++) {
+
+ for (int k = 0; k < i; k++) {
+
+ if (m->control_pressed) { out.close(); return 0; }
+
+ double coef = 0.0;
+ double sig = 0.0;
+ if (method == "spearman") { coef = linear.calcSpearman(xy[i], xy[k], sig); }
+ else if (method == "pearson") { coef = linear.calcPearson(xy[i], xy[k], sig); }
+ else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
+ else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
+ }
+ }
+ }else { //compare otus to metadata
+ for (int i = 0; i < xy.size(); i++) {
+
+ for (int k = 0; k < metadata.size(); k++) {
+
+ if (m->control_pressed) { out.close(); return 0; }
+
+ double coef = 0.0;
+ double sig = 0.0;
+ if (method == "spearman") { coef = linear.calcSpearman(xy[i], metadata[k], sig); }
+ else if (method == "pearson") { coef = linear.calcPearson(xy[i], metadata[k], sig); }
+ else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); }
+ else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl;
+ }
+ }
- double coef = 0.0;
- double sig = 0.0;
- if (method == "spearman") { coef = linear.calcSpearman(xy[i], xy[k], sig); }
- else if (method == "pearson") { coef = linear.calcPearson(xy[i], xy[k], sig); }
- else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
- else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
-
- out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
- }
- }
-
+ }
out.close();
InputData* input = new InputData(relabundfile, "relabund");
vector<SharedRAbundFloatVector*> lookup = input->getSharedRAbundFloatVectors();
string lastLabel = lookup[0]->getLabel();
+
+ if (metadatafile != "") {
+ getMetadata();
+ bool error = false;
+ if (metadata[0].size() != lookup.size()) { m->mothurOut("[ERROR]: You have selected to use " + toString(metadata[0].size()) + " data rows from the metadata file, but " + toString(lookup.size()) + " from the relabund file.\n"); m->control_pressed = true; error=true;}
+ if (error) {
+ //maybe add extra info here?? compare groups in each file??
+ }
+ }
+
+
//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
set<string> processedLabels;
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
//column headings
- out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n";
+ if (metadatafile == "") { out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n"; }
+ else { out << "OTUA\tMetadata\t" << method << "Coef\tSignificance\n"; }
vector< vector<double> > xy; xy.resize(lookup[0]->getNumBins());
for (int i = 0; i < lookup[0]->getNumBins(); i++) { for (int j = 0; j < lookup.size(); j++) { xy[i].push_back(lookup[j]->getAbundance(i)); } }
LinearAlgebra linear;
- for (int i = 0; i < xy.size(); i++) {
-
- for (int k = 0; k < i; k++) {
-
- if (m->control_pressed) { out.close(); return 0; }
-
- double coef = 0.0;
- double sig = 0.0;
- if (method == "spearman") { coef = linear.calcSpearman(xy[i], xy[k], sig); }
- else if (method == "pearson") { coef = linear.calcPearson(xy[i], xy[k], sig); }
- else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
- else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
-
- out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
- }
- }
+ if (metadatafile == "") {//compare otus
+ for (int i = 0; i < xy.size(); i++) {
+
+ for (int k = 0; k < i; k++) {
+
+ if (m->control_pressed) { out.close(); return 0; }
+
+ double coef = 0.0;
+ double sig = 0.0;
+ if (method == "spearman") { coef = linear.calcSpearman(xy[i], xy[k], sig); }
+ else if (method == "pearson") { coef = linear.calcPearson(xy[i], xy[k], sig); }
+ else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
+ else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
+ }
+ }
+ }else { //compare otus to metadata
+ for (int i = 0; i < xy.size(); i++) {
+
+ for (int k = 0; k < metadata.size(); k++) {
+
+ if (m->control_pressed) { out.close(); return 0; }
+
+ double coef = 0.0;
+ double sig = 0.0;
+ if (method == "spearman") { coef = linear.calcSpearman(xy[i], metadata[k], sig); }
+ else if (method == "pearson") { coef = linear.calcPearson(xy[i], metadata[k], sig); }
+ else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); }
+ else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl;
+ }
+ }
+
+ }
out.close();
}
}
/*****************************************************************/
+int OTUAssociationCommand::readMetadata(){
+ try {
+ ifstream in;
+ m->openInputFile(metadatafile, in);
+
+ string headerLine = m->getline(in); m->gobble(in);
+ istringstream iss (headerLine,istringstream::in);
+
+ //read the first label, because it refers to the groups
+ string columnLabel;
+ iss >> columnLabel; m->gobble(iss);
+
+ //save names of columns you are reading
+ while (!iss.eof()) {
+ iss >> columnLabel; m->gobble(iss);
+ metadataLabels.push_back(columnLabel);
+ }
+ int count = metadataLabels.size();
+
+ //read rest of file
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); return 0; }
+
+ string group = "";
+ in >> group; m->gobble(in);
+
+ SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
+ tempLookup->setGroup(group);
+ tempLookup->setLabel("1");
+
+ for (int i = 0; i < count; i++) {
+ float temp = 0.0;
+ in >> temp;
+ tempLookup->push_back(temp, group);
+ }
+
+ metadataLookup.push_back(tempLookup);
+
+ m->gobble(in);
+ }
+ in.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "OTUAssociationCommand", "readMetadata");
+ exit(1);
+ }
+}
+/*****************************************************************/
+//eliminate groups user did not pick, remove zeroed out otus, fill metadata vector.
+int OTUAssociationCommand::getMetadata(){
+ try {
+
+ vector<string> mGroups = m->getGroups();
+
+ bool remove = false;
+ for (int i = 0; i < metadataLookup.size(); i++) {
+ //if this sharedrabund is not from a group the user wants then delete it.
+ if (!(m->inUsersGroups(metadataLookup[i]->getGroup(), mGroups))) {
+ delete metadataLookup[i]; metadataLookup[i] = NULL;
+ metadataLookup.erase(metadataLookup.begin()+i);
+ i--;
+ remove = true;
+ }
+ }
+
+ vector<SharedRAbundFloatVector*> newLookup;
+ for (int i = 0; i < metadataLookup.size(); i++) {
+ SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+ temp->setLabel(metadataLookup[i]->getLabel());
+ temp->setGroup(metadataLookup[i]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ //for each bin
+ vector<string> newBinLabels;
+ for (int i = 0; i < metadataLookup[0]->getNumBins(); i++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+
+ //look at each sharedRabund and make sure they are not all zero
+ bool allZero = true;
+ for (int j = 0; j < metadataLookup.size(); j++) {
+ if (metadataLookup[j]->getAbundance(i) != 0) { allZero = false; break; }
+ }
+
+ //if they are not all zero add this bin
+ if (!allZero) {
+ for (int j = 0; j < metadataLookup.size(); j++) {
+ newLookup[j]->push_back(metadataLookup[j]->getAbundance(i), metadataLookup[j]->getGroup());
+ }
+ newBinLabels.push_back(metadataLabels[i]);
+ }
+ }
+
+ metadataLabels = newBinLabels;
+
+ for (int j = 0; j < metadataLookup.size(); j++) { delete metadataLookup[j]; }
+ metadataLookup.clear();
+
+ metadata.resize(newLookup[0]->getNumBins());
+ for (int i = 0; i < newLookup[0]->getNumBins(); i++) { for (int j = 0; j < newLookup.size(); j++) { metadata[i].push_back(newLookup[j]->getAbundance(i)); } }
+
+ for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "OTUAssociationCommand", "getMetadata");
+ exit(1);
+ }
+}
+/*****************************************************************/
void help() { m->mothurOut(getHelpString()); }
private:
- string sharedfile, relabundfile, groups, label, inputFileName, outputDir, method;
+ string sharedfile, relabundfile, metadatafile, groups, label, inputFileName, outputDir, method;
bool abort, pickedGroups, allLines;
set<string> labels;
+ vector<SharedRAbundFloatVector*> metadataLookup;
+ vector< vector< double> > metadata;
- vector<string> outputNames, Groups;
+ vector<string> outputNames, Groups, metadataLabels;
int processShared();
int process(vector<SharedRAbundVector*>&);
int processRelabund();
int process(vector<SharedRAbundFloatVector*>&);
+ int readMetadata();
+ int getMetadata();
};
try {
vector<int> qualScores;
- int controlChar = int('!');
+ int controlChar = int('@');
for (int i = 0; i < qual.length(); i++) {
int temp = int(qual[i]);
map<string, int>::iterator itFind;
map<string, int> taxonomyFileNames = name2Taxonomy;
+ if (m->debug) { m->mothurOut("[DEBUG]: in error check. Numseqs in template = " + toString(templateFileNames.size()) + ". Numseqs in taxonomy = " + toString(taxonomyFileNames.size()) + ".\n"); }
+
for (int i = 0; i < templateFileNames.size(); i++) {
itFind = taxonomyFileNames.find(templateFileNames[i]);
if (itFind != taxonomyFileNames.end()) { //found it so erase it
taxonomyFileNames.erase(itFind);
}else {
- m->mothurOut(templateFileNames[i] + " is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine();
+ m->mothurOut("'" +templateFileNames[i] + "' is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine();
okay = false;
}
lookup->set(binNumber, abundance);
rank->set(abundance, rank->get(abundance)+1);
- if((i == 0) || (i+1) % increment == 0){
+ if((i == 0) || ((i+1) % increment == 0) || (ends.count(i+1) != 0)){
rcd->updateRankData(rank);
}
}
- if(numSeqs % increment != 0){
+ if((numSeqs % increment != 0) || (ends.count(numSeqs) != 0)){
rcd->updateRankData(rank);
}
class Rarefact {
public:
- Rarefact(OrderVector* o, vector<Display*> disp, int p) :
- numSeqs(o->getNumSeqs()), order(o), displays(disp), label(o->getLabel()), processors(p) { m = MothurOut::getInstance(); }
+ Rarefact(OrderVector* o, vector<Display*> disp, int p, set<int> en) :
+ numSeqs(o->getNumSeqs()), order(o), displays(disp), label(o->getLabel()), processors(p), ends(en) { m = MothurOut::getInstance(); }
Rarefact(vector<SharedRAbundVector*> shared, vector<Display*> disp) :
lookup(shared), displays(disp) { m = MothurOut::getInstance(); }
vector<Display*> displays;
int numSeqs, numGroupComb, processors;
string label;
+ set<int> ends;
void mergeVectors(SharedRAbundVector*, SharedRAbundVector*);
vector<SharedRAbundVector*> lookup;
MothurOut* m;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
+ map<string, set<int> > labelToEnds;
if ((format != "sharedfile")) { inputFileNames.push_back(inputfile); }
- else { inputFileNames = parseSharedFile(sharedfile); format = "rabund"; }
-
+ else { inputFileNames = parseSharedFile(sharedfile, labelToEnds); format = "rabund"; }
+ for (map<string, set<int> >::iterator it = labelToEnds.begin(); it != labelToEnds.end(); it++) {
+ cout << it->first << endl;
+ for (set<int>::iterator its = (it->second).begin(); its != (it->second).end(); its++) {
+ cout << (*its) << endl;
+ }
+ }
if (m->control_pressed) { return 0; }
map<int, string> file2Group; //index in outputNames[i] -> group
if(allLines == 1 || labels.count(order->getLabel()) == 1){
m->mothurOut(order->getLabel()); m->mothurOutEndLine();
- rCurve = new Rarefact(order, rDisplays, processors);
+ map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
+ set<int> ends;
+ if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
+ rCurve = new Rarefact(order, rDisplays, processors, ends);
rCurve->getCurve(freq, nIters);
delete rCurve;
order = (input->getOrderVector(lastLabel));
m->mothurOut(order->getLabel()); m->mothurOutEndLine();
- rCurve = new Rarefact(order, rDisplays, processors);
+ map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
+ set<int> ends;
+ if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
+ rCurve = new Rarefact(order, rDisplays, processors, ends);
+
rCurve->getCurve(freq, nIters);
delete rCurve;
order = (input->getOrderVector(lastLabel));
m->mothurOut(order->getLabel()); m->mothurOutEndLine();
- rCurve = new Rarefact(order, rDisplays, processors);
+ map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
+ set<int> ends;
+ if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
+ rCurve = new Rarefact(order, rDisplays, processors, ends);
+
rCurve->getCurve(freq, nIters);
delete rCurve;
//find different types of files
map<string, map<string, string> > typesFiles;
+ map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
+ vector<string> groupNames;
for (int i = 0; i < outputNames.size(); i++) {
+
string extension = m->getExtension(outputNames[i]);
-
+ string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
+ m->mothurRemove(combineFileName); //remove old file
+
ifstream in;
m->openInputFile(outputNames[i], in);
string labels = m->getline(in);
- string newLine = labels.substr(0, labels.find_first_of('\t'));
-
- newLine += "\tGroup" + labels.substr(labels.find_first_of('\t'));
+ istringstream iss (labels,istringstream::in);
+ string newLabel = ""; vector<string> theseLabels;
+ while(!iss.eof()) { iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
+ vector< vector<string> > allLabels;
+ vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
+ for (int j = 1; j < theseLabels.size()-1; j++) {
+ if (theseLabels[j+1] == "lci") {
+ thisSet.push_back(theseLabels[j]);
+ thisSet.push_back(theseLabels[j+1]);
+ thisSet.push_back(theseLabels[j+2]);
+ j++; j++;
+ }else{ //no lci or hci for this calc.
+ thisSet.push_back(theseLabels[j]);
+ }
+ allLabels.push_back(thisSet);
+ thisSet.clear();
+ }
+ fileLabels[combineFileName] = allLabels;
+
map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
if (itfind != typesFiles.end()) {
(itfind->second)[outputNames[i]] = file2Group[i];
temp[outputNames[i]] = file2Group[i];
typesFiles[extension] = temp;
}
-
- string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
-
- //print headers
- ofstream out;
- m->openOutputFile(combineFileName, out);
- out << newLine << endl;
- out.close();
-
+ if (!(m->inUsersGroups(file2Group[i], groupNames))) { groupNames.push_back(file2Group[i]); }
}
//for each type create a combo file
- map<int, int> lineToNumber;
+
for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
ofstream out;
string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
m->openOutputFileAppend(combineFileName, out);
newFileNames.push_back(combineFileName);
- map<string, string> thisTypesFiles = it->second;
-
+ map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
+ set<int> numSampledSet;
+
//open each type summary file
- map<string, vector<string> > files; //maps file name to lines in file
+ map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
int maxLines = 0;
- int numColumns = 0;
for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
string thisfilename = itFileNameGroup->first;
string group = itFileNameGroup->second;
-
+
ifstream temp;
m->openInputFile(thisfilename, temp);
//read through first line - labels
m->getline(temp); m->gobble(temp);
- vector<string> thisFilesLines;
-
- thisFilesLines.push_back(group);
- int count = 1;
+ map<int, vector< vector<string> > > thisFilesLines;
while (!temp.eof()){
-
- string thisLine = m->getline(temp);
-
- string numSampled = thisLine.substr(0, thisLine.find_first_of('\t'));
- int num = 0;
- convert(numSampled, num);
- numColumns = m->getNumChar(thisLine, '\t');
- lineToNumber[count] = num;
- count++;
-
- thisFilesLines.push_back(thisLine);
- m->gobble(temp);
+ int numSampled = 0;
+ temp >> numSampled; m->gobble(temp);
+
+ vector< vector<string> > theseReads;
+ vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
+ for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
+ vector<string> reads;
+ string next = "";
+ for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
+ temp >> next; m->gobble(temp);
+ reads.push_back(next);
+ }
+ theseReads.push_back(reads);
+ }
+ thisFilesLines[numSampled] = theseReads;
+ m->gobble(temp);
+
+ numSampledSet.insert(numSampled);
}
- files[thisfilename] = thisFilesLines;
+ files[group] = thisFilesLines;
//save longest file for below
if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
m->mothurRemove(thisfilename);
}
-
+ //output new labels line
+ out << fileLabels[combineFileName][0][0] << '\t';
+ for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
+ for (int n = 0; n < groupNames.size(); n++) { // for each group
+ for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
+ out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
+ }
+ }
+ }
+ out << endl;
+
//for each label
- for (int k = 1; k < maxLines; k++) {
+ for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
- //grab data for each group
- for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
-
- string thisfilename = itFileNameGroup->first;
- map<int, int>::iterator itLine = lineToNumber.find(k);
- if (itLine != lineToNumber.end()) {
- string output = toString(itLine->second);
- if (k < files[thisfilename].size()) {
- string line = files[thisfilename][k];
- output = line.substr(0, line.find_first_of('\t'));
- output += '\t' + files[thisfilename][0] + '\t' + line.substr(line.find_first_of('\t'));
- }else{
- output += '\t' + files[thisfilename][0] + '\t';
- for (int h = 0; h < numColumns; h++) {
- output += "NA\t";
- }
- }
- out << output << endl;
- }else { m->mothurOut("[ERROR]: parsing results, cant find " + toString(k)); m->mothurOutEndLine(); }
- }
+ out << (*itNumSampled) << '\t';
+
+ if (m->control_pressed) { break; }
+
+ for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
+ //grab data for each group
+ for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
+
+ string group = itFileNameGroup->first;
+
+ map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
+ if (itLine != files[group].end()) {
+ for (int l = 0; l < (itLine->second)[k].size(); l++) {
+ out << (itLine->second)[k][l] << '\t';
+
+ }
+ }else {
+ for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) {
+ out << "NA" << '\t';
+ }
+ }
+ }
+ }
+ out << endl;
}
-
out.close();
-
}
//return combine file name
}
}
//**********************************************************************************************************************
-vector<string> RareFactCommand::parseSharedFile(string filename) {
+vector<string> RareFactCommand::parseSharedFile(string filename, map<string, set<int> >& label2Ends) {
try {
vector<string> filenames;
m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
rav.print(*(filehandles[lookup[i]->getGroup()]));
(*(filehandles[lookup[i]->getGroup()])).close();
+ label2Ends[lookup[i]->getLabel()].insert(rav.getNumSeqs());
}
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
vector<string> groups;
string outputDir;
- vector<string> parseSharedFile(string);
+ vector<string> parseSharedFile(string, map<string, set<int> >&);
vector<string> createGroupFile(vector<string>&, map<int, string>);
};
#include "rarefactsharedcommand.h"
#include "sharedsobs.h"
#include "sharednseqs.h"
+#include "sharedutilities.h"
//**********************************************************************************************************************
vector<string> RareFactSharedCommand::setParameters(){
try {
CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
+ CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
+ CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string helpString = "";
ValidCalculators validCalculator;
helpString += "The collect.shared command parameters are shared, label, freq, calc and groups. shared is required if there is no current sharedfile. \n";
- helpString += "The rarefaction.shared command parameters are shared, label, iters, groups, jumble and calc. shared is required if there is no current sharedfile. \n";
+ helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble and calc. shared is required if there is no current sharedfile. \n";
+ helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
+ helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
helpString += "The rarefaction command should be in the following format: \n";
helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["shared"] = inputDir + it->second; }
}
+
+ it = parameters.find("design");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["design"] = inputDir + it->second; }
+ }
+
}
//get shared file
if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
}else { m->setSharedFile(sharedfile); }
+
+ designfile = validParameter.validFile(parameters, "design", true);
+ if (designfile == "not open") { abort = true; designfile = ""; }
+ else if (designfile == "not found") { designfile = ""; }
+ else { m->setDesignFile(designfile); }
//if the user changes the output directory command factory will send this info to us in the output parameter
m->splitAtDash(groups, Groups);
}
m->setGroups(Groups);
+
+ string sets = validParameter.validFile(parameters, "sets", false);
+ if (sets == "not found") { sets = ""; }
+ else {
+ m->splitAtDash(sets, Sets);
+ }
string temp;
temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
- string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+
+ GroupMap designMap;
+ if (designfile == "") { //fake out designMap to run with process
+ process(designMap, "");
+ }else {
+ designMap.readDesignMap(designfile);
+
+ //fill Sets - checks for "all" and for any typo groups
+ SharedUtil util;
+ vector<string> nameSets = designMap.getNamesOfGroups();
+ util.setGroups(Sets, nameSets);
+
+ for (int i = 0; i < Sets.size(); i++) {
+ process(designMap, Sets[i]);
+ }
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RareFactSharedCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
+ try {
+ Rarefact* rCurve;
+ vector<Display*> rDisplays;
+
+ InputData input(sharedfile, "sharedfile");
+ lookup = input.getSharedRAbundVectors();
+ if (lookup.size() < 2) {
+ m->mothurOut("I cannot run the command without at least 2 valid groups.");
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ return 0;
+ }
+
+ string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+
+ vector<string> newGroups = m->getGroups();
+ if (thisSet != "") { //make groups only filled with groups from this set so that's all inputdata will read
+ vector<string> thisSets; thisSets.push_back(thisSet);
+ newGroups = designMap.getNamesSeqs(thisSets);
+ fileNameRoot += thisSet + ".";
+ }
+
+ vector<SharedRAbundVector*> subset;
+ if (thisSet == "") { subset.clear(); subset = lookup; }
+ else {//fill subset with this sets groups
+ subset.clear();
+ for (int i = 0; i < lookup.size(); i++) {
+ if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+ subset.push_back(lookup[i]);
+ }
+ }
+ }
ValidCalculators validCalculator;
-
for (int i=0; i<Estimators.size(); i++) {
if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
if (Estimators[i] == "sharedobserved") {
}
//if the users entered no valid calculators don't execute command
- if (rDisplays.size() == 0) { return 0; }
-
- input = new InputData(sharedfile, "sharedfile");
- lookup = input->getSharedRAbundVectors();
- string lastLabel = lookup[0]->getLabel();
+ if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
if (m->control_pressed) {
- m->clearGroups();
- delete input;
for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
return 0;
}
-
-
- if (lookup.size() < 2) {
- m->mothurOut("I cannot run the command without at least 2 valid groups.");
- for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
- return 0;
- }
-
+
+
//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ string lastLabel = subset[0]->getLabel();
set<string> processedLabels;
set<string> userLabels = labels;
-
+
//as long as you are not at the end of the file or done wih the lines you want
- while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+ while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
if (m->control_pressed) {
- m->clearGroups();
- delete input;
for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
return 0;
}
- if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
- m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- rCurve = new Rarefact(lookup, rDisplays);
+ if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
+ m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
+ rCurve = new Rarefact(subset, rDisplays);
rCurve->getSharedCurve(freq, nIters);
delete rCurve;
-
- processedLabels.insert(lookup[0]->getLabel());
- userLabels.erase(lookup[0]->getLabel());
+
+ processedLabels.insert(subset[0]->getLabel());
+ userLabels.erase(subset[0]->getLabel());
}
- if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
- string saveLabel = lookup[0]->getLabel();
-
- for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
- lookup = input->getSharedRAbundVectors(lastLabel);
+ if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = subset[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ if (thisSet == "") { subset.clear(); subset = lookup; }
+ else {//fill subset with this sets groups
+ subset.clear();
+ for (int i = 0; i < lookup.size(); i++) {
+ if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+ subset.push_back(lookup[i]);
+ }
+ }
+ }
- m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- rCurve = new Rarefact(lookup, rDisplays);
- rCurve->getSharedCurve(freq, nIters);
- delete rCurve;
-
- processedLabels.insert(lookup[0]->getLabel());
- userLabels.erase(lookup[0]->getLabel());
-
- //restore real lastlabel to save below
- lookup[0]->setLabel(saveLabel);
+ m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
+ rCurve = new Rarefact(subset, rDisplays);
+ rCurve->getSharedCurve(freq, nIters);
+ delete rCurve;
+
+ processedLabels.insert(subset[0]->getLabel());
+ userLabels.erase(subset[0]->getLabel());
+
+ //restore real lastlabel to save below
+ subset[0]->setLabel(saveLabel);
}
-
+
- lastLabel = lookup[0]->getLabel();
+ lastLabel = subset[0]->getLabel();
//get next line to process
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
- lookup = input->getSharedRAbundVectors();
+ lookup = input.getSharedRAbundVectors();
+
+ if (lookup[0] != NULL) {
+ if (thisSet == "") { subset.clear(); subset = lookup; }
+ else {//fill subset with this sets groups
+ subset.clear();
+ for (int i = 0; i < lookup.size(); i++) {
+ if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+ subset.push_back(lookup[i]);
+ }
+ }
+ }
+ }else { subset.clear(); subset.push_back(NULL); }
+
}
if (m->control_pressed) {
- m->clearGroups();
- delete input;
for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
return 0;
}
if (m->control_pressed) {
- m->clearGroups();
- delete input;
for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
return 0;
//run last label if you need to
if (needToRun == true) {
for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
- lookup = input->getSharedRAbundVectors(lastLabel);
-
- m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- rCurve = new Rarefact(lookup, rDisplays);
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ if (thisSet == "") { subset.clear(); subset = lookup; }
+ else {//fill subset with this sets groups
+ subset.clear();
+ for (int i = 0; i < lookup.size(); i++) {
+ if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+ subset.push_back(lookup[i]);
+ }
+ }
+ }
+
+ m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
+ rCurve = new Rarefact(subset, rDisplays);
rCurve->getSharedCurve(freq, nIters);
delete rCurve;
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
}
for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
- m->clearGroups();
- delete input;
-
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
-
- m->mothurOutEndLine();
- m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
- m->mothurOutEndLine();
- return 0;
- }
+
+ return 0;
+ }
catch(exception& e) {
- m->errorOut(e, "RareFactSharedCommand", "execute");
+ m->errorOut(e, "RareFactSharedCommand", "process");
exit(1);
}
}
-
-
//**********************************************************************************************************************
private:
vector<SharedRAbundVector*> lookup;
- InputData* input;
- Rarefact* rCurve;
- vector<Display*> rDisplays;
int nIters;
string format;
float freq;
bool abort, allLines, jumble;
set<string> labels; //holds labels to be used
- string label, calc, groups, outputDir, sharedfile;
- vector<string> Estimators, Groups, outputNames;
+ string label, calc, groups, outputDir, sharedfile, designfile;
+ vector<string> Estimators, Groups, outputNames, Sets;
+
+ int process(GroupMap&, string);
};
--- /dev/null
+//
+// removeotulabels.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 5/21/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "removeotulabelscommand.h"
+
+//**********************************************************************************************************************
+vector<string> RemoveOtuLabelsCommand::setParameters(){
+ try {
+ CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
+ CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pconstaxonomy);
+ CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(potucorr);
+ CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pcorraxes);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveOtuLabelsCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string RemoveOtuLabelsCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The remove.otulabels command can be used to remove specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
+ helpString += "The remove.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n";
+ helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n";
+ helpString += "The otucorr parameter is input the results of the otu.association command.\n";
+ helpString += "The corraxes parameter is input the results of the corr.axes command.\n";
+ helpString += "The remove.otulabels commmand should be in the following format: \n";
+ helpString += "remove.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveOtuLabelsCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["contaxonomy"] = tempOutNames;
+ outputTypes["otu.corr"] = tempOutNames;
+ outputTypes["corr.axes"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveOtuLabelsCommand", "RemoveOtuLabelsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+
+ //edit file types below to include only the types you added as parameters
+
+ string path;
+ it = parameters.find("constaxonomy");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("accnos");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["accnos"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("corraxes");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["corraxes"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("otucorr");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["otucorr"] = inputDir + it->second; }
+ }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["contaxonomy"] = tempOutNames;
+ outputTypes["otu.corr"] = tempOutNames;
+ outputTypes["corr.axes"] = tempOutNames;
+
+ //check for parameters
+ accnosfile = validParameter.validFile(parameters, "accnos", true);
+ if (accnosfile == "not open") { abort = true; }
+ else if (accnosfile == "not found") {
+ accnosfile = m->getAccnosFile();
+ if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }else { m->setAccnosFile(accnosfile); }
+
+ constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
+ if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
+ else if (constaxonomyfile == "not found") { constaxonomyfile = ""; }
+
+ corraxesfile = validParameter.validFile(parameters, "corraxes", true);
+ if (corraxesfile == "not open") { corraxesfile = ""; abort = true; }
+ else if (corraxesfile == "not found") { corraxesfile = ""; }
+
+ otucorrfile = validParameter.validFile(parameters, "otucorr", true);
+ if (otucorrfile == "not open") { otucorrfile = ""; abort = true; }
+ else if (otucorrfile == "not found") { otucorrfile = ""; }
+
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
+ if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "")) { m->mothurOut("You must provide one of the following: constaxonomy, corraxes or otucorr."); m->mothurOutEndLine(); abort = true; }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveOtuLabelsCommand", "RemoveOtuLabelsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int RemoveOtuLabelsCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ //get labels you want to keep
+ readAccnos();
+
+ if (m->control_pressed) { return 0; }
+
+ //read through the correct file and output lines you want to keep
+ if (constaxonomyfile != "") { readClassifyOtu(); }
+ if (corraxesfile != "") { readCorrAxes(); }
+ if (otucorrfile != "") { readOtuAssociation(); }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOtuLabelsCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::readClassifyOtu(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(constaxonomyfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomyfile)) + "pick.taxonomy";
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ ifstream in;
+ m->openInputFile(constaxonomyfile, in);
+
+ bool wroteSomething = false;
+ int removedCount = 0;
+
+ //read headers
+ string headers = m->getline(in);
+ out << headers << endl;
+
+ while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ string otu = ""; string tax = "unknown";
+ int size = 0;
+
+ in >> otu >> size >> tax; m->gobble(in);
+
+ if (labels.count(otu) == 0) {
+ wroteSomething = true;
+ out << otu << '\t' << size << '\t' << tax << endl;
+ }else { removedCount++; }
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file."); m->mothurOutEndLine(); }
+ outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName);
+
+ m->mothurOut("Removed " + toString(removedCount) + " otus from your constaxonomy file."); m->mothurOutEndLine();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveOtuLabelsCommand", "readClassifyOtu");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::readOtuAssociation(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(otucorrfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(otucorrfile)) + "pick.corr";
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ ifstream in;
+ m->openInputFile(otucorrfile, in);
+
+ bool wroteSomething = false;
+ int removedCount = 0;
+
+ //read headers
+ string headers = m->getline(in);
+ out << headers << endl;
+
+ while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ string otu1 = "";
+ string otu2 = "";
+ in >> otu1 >> otu2;
+ string line = m->getline(in); m->gobble(in);
+
+ if ((labels.count(otu1) == 0) && (labels.count(otu2) == 0)){
+ wroteSomething = true;
+
+ out << otu1 << '\t' << otu2 << '\t' << line << endl;
+ }else { removedCount++; }
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file."); m->mothurOutEndLine(); }
+ outputNames.push_back(outputFileName); outputTypes["otu.corr"].push_back(outputFileName);
+
+ m->mothurOut("Removed " + toString(removedCount) + " lines from your otu.corr file."); m->mothurOutEndLine();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveOtuLabelsCommand", "readOtuAssociation");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::readCorrAxes(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(corraxesfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(corraxesfile)) + "pick.axes";
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+
+ ifstream in;
+ m->openInputFile(corraxesfile, in);
+
+ bool wroteSomething = false;
+ int removedCount = 0;
+
+ //read headers
+ string headers = m->getline(in);
+ out << headers << endl;
+
+ while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ string otu = "";
+ in >> otu;
+ string line = m->getline(in); m->gobble(in);
+
+ if (labels.count(otu) == 0) {
+ wroteSomething = true;
+
+ out << otu << '\t' << line << endl;
+ }else { removedCount++; }
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file."); m->mothurOutEndLine(); }
+ outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);
+
+ m->mothurOut("Removed " + toString(removedCount) + " lines from your corr.axes file."); m->mothurOutEndLine();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveOtuLabelsCommand", "readCorrAxes");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::readAccnos(){
+ try {
+
+ ifstream in;
+ m->openInputFile(accnosfile, in);
+ string name;
+
+ while(!in.eof()){
+ in >> name;
+
+ labels.insert(name);
+
+ m->gobble(in);
+ }
+ in.close();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveOtuLabelsCommand", "readAccnos");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
+
--- /dev/null
+#ifndef Mothur_removeotulabelscommand_h
+#define Mothur_removeotulabelscommand_h
+
+
+//
+// removeotulabelscommand.h
+// Mothur
+//
+// Created by Sarah Westcott on 5/21/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "command.hpp"
+
+/**************************************************************************************************/
+
+class RemoveOtuLabelsCommand : public Command {
+public:
+ RemoveOtuLabelsCommand(string);
+ RemoveOtuLabelsCommand();
+ ~RemoveOtuLabelsCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "remove.otulabels"; }
+ string getCommandCategory() { return "OTU-Based Approaches"; }
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Get.otulabels"; }
+ string getDescription() { return "Can be used with output from classify.otu, otu.association, or corr.axes to remove specific otus."; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort;
+ string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile;
+ vector<string> outputNames;
+ set<string> labels;
+
+ int readClassifyOtu();
+ int readOtuAssociation();
+ int readCorrAxes();
+ int readAccnos();
+
+};
+
+/**************************************************************************************************/
+
+
+
+
+
+
+
+
+#endif
int Sequence::getStartPos(){
if(startPos == -1){
for(int j = 0; j < alignmentLength; j++) {
- if(aligned[j] != '.'){
+ if((aligned[j] != '.')&&(aligned[j] != '-')){
startPos = j + 1;
break;
}
int Sequence::getEndPos(){
if(endPos == -1){
for(int j=alignmentLength-1;j>=0;j--){
- if(aligned[j] != '.'){
+ if((aligned[j] != '.')&&(aligned[j] != '-')){
endPos = j + 1;
break;
}
if (temp == "not found") { debug = false; nodebug=true; }
else { debug = m->isTrue(temp); }
m->debug = debug;
+
+ if (debug) { m->mothurOut("Setting [DEBUG] flag.\n"); }
if ((input == "") && (output == "") && (tempdefault == "") && nodebug) {
m->mothurOut("You must provide either an input, output, tempdefault or debug for the set.outdir command."); m->mothurOutEndLine(); abort = true;
int start = time(NULL);
+ filenames[s] = m->getFullPathName(filenames[s]);
m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
string accnos = "";
CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
+ CommandParameter plarge("large", "Number", "", "-1", "", "", "",false,false); parameters.push_back(plarge);
CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
flowFileName = validParameter.validFile(parameters, "flow", true);
flowFilesFileName = validParameter.validFile(parameters, "file", true);
if (flowFileName == "not found" && flowFilesFileName == "not found") {
- m->mothurOut("values for either flow or file must be provided for the shhh.seqs command.");
+ m->mothurOut("values for either flow or file must be provided for the shhh.flows command.");
m->mothurOutEndLine();
abort = true;
}
else{
ofstream temp;
- string thisoutputDir = m->hasPath(flowFilesFileName); //if user entered a file with a path then preserve it
+ string thisoutputDir = outputDir;
+ if (outputDir == "") { thisoutputDir = m->hasPath(flowFilesFileName); } //if user entered a file with a path then preserve it
- //flow.files = 9 character offset
- compositeFASTAFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.fasta";
+ //we want to rip off .files, and also .flow if its there
+ string fileroot = m->getRootName(m->getSimpleName(flowFilesFileName));
+ if (fileroot[fileroot.length()-1] == '.') { fileroot = fileroot.substr(0, fileroot.length()-1); } //rip off dot
+ string extension = m->getExtension(fileroot);
+ if (extension == ".flow") { fileroot = m->getRootName(fileroot); }
+ else { fileroot += "."; } //add back if needed
+
+ compositeFASTAFileName = thisoutputDir + fileroot + "shhh.fasta";
m->openOutputFile(compositeFASTAFileName, temp);
temp.close();
- compositeNamesFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.names";
+ compositeNamesFileName = thisoutputDir + fileroot + "shhh.names";
m->openOutputFile(compositeNamesFileName, temp);
temp.close();
}
temp = validParameter.validFile(parameters, "maxiter", false); if (temp == "not found"){ temp = "1000"; }
m->mothurConvert(temp, maxIters);
+
+ temp = validParameter.validFile(parameters, "large", false); if (temp == "not found"){ temp = "0"; }
+ m->mothurConvert(temp, largeSize);
+ if (largeSize != 0) { large = true; }
+ else { large = false; }
+ if (largeSize < 0) { m->mothurOut("The value of the large cannot be negative.\n"); }
+
+#ifdef USE_MPI
+ if (large) { m->mothurOut("The large parameter is not available with the MPI-Enabled version.\n"); large=false; }
+#endif
+
temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found") { temp = "60"; }
m->mothurConvert(temp, sigma);
}
/**************************************************************************************************/
-int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
+vector<string> ShhherCommand::parseFlowFiles(string filename){
try {
+ vector<string> files;
+ int count = 0;
- for(int i=start;i<end;i++){
-
- if (m->control_pressed) { break; }
-
- string flowFileName = filenames[i];
-
- m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n");
- m->mothurOut("Reading flowgrams...\n");
-
- vector<string> seqNameVector;
- vector<int> lengths;
- vector<short> flowDataIntI;
- vector<double> flowDataPrI;
- map<string, int> nameMap;
- vector<short> uniqueFlowgrams;
- vector<int> uniqueCount;
- vector<int> mapSeqToUnique;
- vector<int> mapUniqueToSeq;
- vector<int> uniqueLengths;
- int numFlowCells;
-
- int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
-
- if (m->control_pressed) { break; }
-
- m->mothurOut("Identifying unique flowgrams...\n");
- int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
-
- if (m->control_pressed) { break; }
-
- m->mothurOut("Calculating distances between flowgrams...\n");
- string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
- unsigned long long begTime = time(NULL);
- double begClock = clock();
+ ifstream in;
+ m->openInputFile(filename, in);
- flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
-
- m->mothurOutEndLine();
- m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
-
+ int thisNumFLows = 0;
+ in >> thisNumFLows; m->gobble(in);
+
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
- string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
- createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
-
- if (m->control_pressed) { break; }
-
- m->mothurOut("\nClustering flowgrams...\n");
- string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
- cluster(listFileName, distFileName, namesFileName);
-
- if (m->control_pressed) { break; }
+ ofstream out;
+ string outputFileName = filename + toString(count) + ".temp";
+ m->openOutputFile(outputFileName, out);
+ out << thisNumFLows << endl;
+ files.push_back(outputFileName);
+
+ int numLinesWrote = 0;
+ for (int i = 0; i < largeSize; i++) {
+ if (in.eof()) { break; }
+ string line = m->getline(in); m->gobble(in);
+ out << line << endl;
+ numLinesWrote++;
+ }
+ out.close();
- vector<int> otuData;
- vector<int> cumNumSeqs;
- vector<int> nSeqsPerOTU;
- vector<vector<int> > aaP; //tMaster->aanP: each row is a different otu / each col contains the sequence indices
- vector<vector<int> > aaI; //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI
- vector<int> seqNumber; //tMaster->anP: the sequence id number sorted by OTU
- vector<int> seqIndex; //tMaster->anI; the index that corresponds to seqNumber
+ if (numLinesWrote == 0) { m->mothurRemove(outputFileName); files.pop_back(); }
+ count++;
+ }
+ in.close();
+
+ if (m->control_pressed) { for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); } files.clear(); }
+
+ m->mothurOut("\nDivided " + filename + " into " + toString(files.size()) + " files.\n\n");
+
+ return files;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ShhherCommand", "parseFlowFiles");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
-
- int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
+int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
+ try {
+
+ for(int i=start;i<end;i++){
if (m->control_pressed) { break; }
- m->mothurRemove(distFileName);
- m->mothurRemove(namesFileName);
- m->mothurRemove(listFileName);
-
- vector<double> dist; //adDist - distance of sequences to centroids
- vector<short> change; //did the centroid sequence change? 0 = no; 1 = yes
- vector<int> centroids; //the representative flowgram for each cluster m
- vector<double> weight;
- vector<double> singleTau; //tMaster->adTau: 1-D Tau vector (1xnumSeqs)
- vector<int> nSeqsBreaks;
- vector<int> nOTUsBreaks;
-
- dist.assign(numSeqs * numOTUs, 0);
- change.assign(numOTUs, 1);
- centroids.assign(numOTUs, -1);
- weight.assign(numOTUs, 0);
- singleTau.assign(numSeqs, 1.0);
+ vector<string> theseFlowFileNames; theseFlowFileNames.push_back(filenames[i]);
+ if (large) { theseFlowFileNames = parseFlowFiles(filenames[i]); }
- nSeqsBreaks.assign(2, 0);
- nOTUsBreaks.assign(2, 0);
+ if (m->control_pressed) { break; }
- nSeqsBreaks[0] = 0;
- nSeqsBreaks[1] = numSeqs;
- nOTUsBreaks[1] = numOTUs;
-
- if (m->control_pressed) { break; }
-
- double maxDelta = 0;
- int iter = 0;
-
- begClock = clock();
- begTime = time(NULL);
+ double begClock = clock();
+ unsigned long long begTime;
- m->mothurOut("\nDenoising flowgrams...\n");
- m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
-
- while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
-
- if (m->control_pressed) { break; }
-
- double cycClock = clock();
- unsigned long long cycTime = time(NULL);
- fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
-
- if (m->control_pressed) { break; }
+ for (int g = 0; g < theseFlowFileNames.size(); g++) {
- calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
-
- if (m->control_pressed) { break; }
+ string flowFileName = theseFlowFileNames[g];
+ m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n");
+ m->mothurOut("Reading flowgrams...\n");
- maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);
+ vector<string> seqNameVector;
+ vector<int> lengths;
+ vector<short> flowDataIntI;
+ vector<double> flowDataPrI;
+ map<string, int> nameMap;
+ vector<short> uniqueFlowgrams;
+ vector<int> uniqueCount;
+ vector<int> mapSeqToUnique;
+ vector<int> mapUniqueToSeq;
+ vector<int> uniqueLengths;
+ int numFlowCells;
+
+ int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
if (m->control_pressed) { break; }
- double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight);
+ m->mothurOut("Identifying unique flowgrams...\n");
+ int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
if (m->control_pressed) { break; }
- checkCentroids(numOTUs, centroids, weight);
-
- if (m->control_pressed) { break; }
-
- calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU, dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
-
- if (m->control_pressed) { break; }
-
- iter++;
-
- m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+ m->mothurOut("Calculating distances between flowgrams...\n");
+ string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
+ begTime = time(NULL);
+
- }
-
- if (m->control_pressed) { break; }
-
- m->mothurOut("\nFinalizing...\n");
- fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
-
- if (m->control_pressed) { break; }
-
- setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
-
- if (m->control_pressed) { break; }
-
- vector<int> otuCounts(numOTUs, 0);
- for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
-
- calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
-
- if (m->control_pressed) { break; }
+ flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
+
+ m->mothurOutEndLine();
+ m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
+
+
+ string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+ createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
+
+ if (m->control_pressed) { break; }
+
+ m->mothurOut("\nClustering flowgrams...\n");
+ string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
+ cluster(listFileName, distFileName, namesFileName);
+
+ if (m->control_pressed) { break; }
+
+ vector<int> otuData;
+ vector<int> cumNumSeqs;
+ vector<int> nSeqsPerOTU;
+ vector<vector<int> > aaP; //tMaster->aanP: each row is a different otu / each col contains the sequence indices
+ vector<vector<int> > aaI; //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI
+ vector<int> seqNumber; //tMaster->anP: the sequence id number sorted by OTU
+ vector<int> seqIndex; //tMaster->anI; the index that corresponds to seqNumber
+
+
+ int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
+
+ if (m->control_pressed) { break; }
+
+ m->mothurRemove(distFileName);
+ m->mothurRemove(namesFileName);
+ m->mothurRemove(listFileName);
+
+ vector<double> dist; //adDist - distance of sequences to centroids
+ vector<short> change; //did the centroid sequence change? 0 = no; 1 = yes
+ vector<int> centroids; //the representative flowgram for each cluster m
+ vector<double> weight;
+ vector<double> singleTau; //tMaster->adTau: 1-D Tau vector (1xnumSeqs)
+ vector<int> nSeqsBreaks;
+ vector<int> nOTUsBreaks;
+
+ dist.assign(numSeqs * numOTUs, 0);
+ change.assign(numOTUs, 1);
+ centroids.assign(numOTUs, -1);
+ weight.assign(numOTUs, 0);
+ singleTau.assign(numSeqs, 1.0);
+
+ nSeqsBreaks.assign(2, 0);
+ nOTUsBreaks.assign(2, 0);
+
+ nSeqsBreaks[0] = 0;
+ nSeqsBreaks[1] = numSeqs;
+ nOTUsBreaks[1] = numOTUs;
+
+ if (m->control_pressed) { break; }
+
+ double maxDelta = 0;
+ int iter = 0;
+
+ begClock = clock();
+ begTime = time(NULL);
+
+ m->mothurOut("\nDenoising flowgrams...\n");
+ m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
+
+ while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
+
+ if (m->control_pressed) { break; }
+
+ double cycClock = clock();
+ unsigned long long cycTime = time(NULL);
+ fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+
+ if (m->control_pressed) { break; }
+
+ calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
+
+ if (m->control_pressed) { break; }
+
+ maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);
+
+ if (m->control_pressed) { break; }
+
+ double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight);
+
+ if (m->control_pressed) { break; }
+
+ checkCentroids(numOTUs, centroids, weight);
+
+ if (m->control_pressed) { break; }
+
+ calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU, dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
+
+ if (m->control_pressed) { break; }
+
+ iter++;
+
+ m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+
+ }
+
+ if (m->control_pressed) { break; }
+
+ m->mothurOut("\nFinalizing...\n");
+ fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+
+ if (m->control_pressed) { break; }
+
+ setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
+
+ if (m->control_pressed) { break; }
+
+ vector<int> otuCounts(numOTUs, 0);
+ for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
+
+ calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
+
+ if (m->control_pressed) { break; }
+
+ if ((large) && (g == 0)) { flowFileName = filenames[i]; theseFlowFileNames[0] = filenames[i]; }
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir = m->hasPath(flowFileName); }
+ string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual";
+ string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta";
+ string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names";
+ string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts";
+ string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
+ string groupFileName = fileRoot + "shhh.groups";
+
+
+ writeQualities(numOTUs, numFlowCells, qualityFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
+ writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, fastaFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
+ writeNames(thisCompositeNamesFileName, numOTUs, nameFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU); if (m->control_pressed) { break; }
+ writeClusters(otuCountsFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI); if (m->control_pressed) { break; }
+ writeGroups(groupFileName, fileRoot, numSeqs, seqNameVector); if (m->control_pressed) { break; }
+
+ if (large) {
+ if (g > 0) {
+ m->appendFiles(qualityFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.qual"));
+ m->mothurRemove(qualityFileName);
+ m->appendFiles(fastaFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.fasta"));
+ m->mothurRemove(fastaFileName);
+ m->appendFiles(nameFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.names"));
+ m->mothurRemove(nameFileName);
+ m->appendFiles(otuCountsFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.counts"));
+ m->mothurRemove(otuCountsFileName);
+ m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.groups"));
+ m->mothurRemove(groupFileName);
+ }
+ m->mothurRemove(theseFlowFileNames[g]);
+ }
+ }
- writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
- writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
- writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU); if (m->control_pressed) { break; }
- writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI); if (m->control_pressed) { break; }
- writeGroups(flowFileName, numSeqs, seqNameVector); if (m->control_pressed) { break; }
-
m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
}
}
/**************************************************************************************************/
-void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<int>& nSeqsPerOTU, vector<int>& seqNumber,
+void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string qualityFileName, vector<int> otuCounts, vector<int>& nSeqsPerOTU, vector<int>& seqNumber,
vector<double>& singleTau, vector<short>& flowDataIntI, vector<short>& uniqueFlowgrams, vector<int>& cumNumSeqs,
vector<int>& mapUniqueToSeq, vector<string>& seqNameVector, vector<int>& centroids, vector<vector<int> >& aaI){
try {
- string thisOutputDir = outputDir;
- if (outputDir == "") { thisOutputDir = m->hasPath(filename); }
- string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.qual";
ofstream qualityFile;
m->openOutputFile(qualityFileName, qualityFile);
/**************************************************************************************************/
-void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
+void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string fastaFileName, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
try {
- string thisOutputDir = outputDir;
- if (outputDir == "") { thisOutputDir = m->hasPath(filename); }
- string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.fasta";
+
ofstream fastaFile;
m->openOutputFile(fastaFileName, fastaFile);
}
}
- fastaFile << newSeq.substr(4) << endl;
+ if (newSeq.length() >= 4) { fastaFile << newSeq.substr(4) << endl; }
+ else { fastaFile << "NNNN" << endl; }
}
}
fastaFile.close();
/**************************************************************************************************/
-void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string filename, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
+void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string nameFileName, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
try {
- string thisOutputDir = outputDir;
- if (outputDir == "") { thisOutputDir = m->hasPath(filename); }
- string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.names";
+
ofstream nameFile;
m->openOutputFile(nameFileName, nameFile);
/**************************************************************************************************/
-void ShhherCommand::writeGroups(string filename, int numSeqs, vector<string>& seqNameVector){
+void ShhherCommand::writeGroups(string groupFileName, string fileRoot, int numSeqs, vector<string>& seqNameVector){
try {
- string thisOutputDir = outputDir;
- if (outputDir == "") { thisOutputDir = m->hasPath(filename); }
- string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(filename));
- string groupFileName = fileRoot + "shhh.groups";
- ofstream groupFile;
+ ofstream groupFile;
m->openOutputFile(groupFileName, groupFile);
for(int i=0;i<numSeqs;i++){
/**************************************************************************************************/
-void ShhherCommand::writeClusters(string filename, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
+void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
try {
- string thisOutputDir = outputDir;
- if (outputDir == "") { thisOutputDir = m->hasPath(filename); }
- string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.counts";
ofstream otuCountsFile;
m->openOutputFile(otuCountsFileName, otuCountsFile);
//otuCountsFile << base;
}
}
- otuCountsFile << newSeq.substr(4) << endl;
+
+ if (newSeq.length() >= 4) { otuCountsFile << newSeq.substr(4) << endl; }
+ else { otuCountsFile << "NNNN" << endl; }
}
otuCountsFile << endl;
}
linePair(int i, int j) : start(i), end(j) {}
};
- int abort;
+ bool abort, large;
string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName;
- int processors, maxIters;
+ int processors, maxIters, largeSize;
float cutoff, sigma, minDelta;
string flowOrder;
vector<double> jointLookUp;
vector<string> flowFileVector;
+ vector<string> parseFlowFiles(string);
int driver(vector<string>, string, string, int, int);
int createProcesses(vector<string>);
int getFlowData(string, vector<string>&, vector<int>&, vector<short>&, map<string, int>&, int&);
void writeQualities(int, int, string, vector<int>, vector<int>&, vector<int>&, vector<double>&, vector<short>&, vector<short>&, vector<int>&, vector<int>&, vector<string>&, vector<int>&, vector<vector<int> >&);
void writeSequences(string, int, int, string, vector<int>, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&);
void writeNames(string, int, string, vector<int>, vector<string>&, vector<vector<int> >&, vector<int>&);
- void writeGroups(string, int, vector<string>&);
+ void writeGroups(string, string, int, vector<string>&);
void writeClusters(string, int, int, vector<int>, vector<int>&, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&);
void getSingleLookUp();
}
/***********************************************************************/
-SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, string output){
+SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, bool cl, string output){
m = MothurOut::getInstance();
fastafile = ffile;
namefile = name;
distCutoff = cu; //for fasta method if you are creating distance matrix you need a cutoff for that
method = t;
processors = p;
+ classic = cl;
outputDir = output;
}
//process each distance file
for (int i = 0; i < numGroups; i++) {
- string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff);
+ string options = "";
+ if (classic) { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", output=lt"; }
+ else { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff); }
if (outputDir != "") { options += ", outputdir=" + outputDir; }
Command* command = new DistanceCommand(options);
for(int i=0;i<numGroups;i++){
string tempNameFile = namefile + "." + toString(i) + ".temp";
if (outputDir == "") { outputDir = m->hasPath(fastafile); }
- string tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
+ string tempDistFile = "";
+ if (classic) { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "phylip.dist";}
+ else { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist"; }
//if there are valid distances
ifstream fileHandle;
public:
SplitMatrix(string, string, string, float, string, bool); //column formatted distance file, namesfile, cutoff, method, large
- SplitMatrix(string, string, string, float, float, string, int, string); //fastafile, namefile, taxFile, taxcutoff, cutoff, method, processors, outputDir
+ SplitMatrix(string, string, string, float, float, string, int, bool, string); //fastafile, namefile, taxFile, taxcutoff, cutoff, method, processors, classic, outputDir
~SplitMatrix();
int split();
string distFile, namefile, singleton, method, taxFile, fastafile, outputDir;
vector< map< string, string> > dists;
float cutoff, distCutoff;
- bool large;
+ bool large, classic;
int processors;
int splitDistance();
*/
#include "summarysharedcommand.h"
+#include "subsample.h"
//**********************************************************************************************************************
vector<string> SummarySharedCommand::setParameters(){
try {
CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+ CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
+ CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
+ CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
try {
string helpString = "";
ValidCalculators validCalculator;
- helpString += "The summary.shared command parameters are shared, label, calc, distance, processors and all. shared is required if there is no current sharedfile.\n";
+ helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all. shared is required if there is no current sharedfile.\n";
helpString += "The summary.shared command should be in the following format: \n";
helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
helpString += validCalculator.printCalc("sharedsummary");
+ helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
+ helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
+ helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
helpString += "The default value for groups is all the groups in your groupfile.\n";
helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
createPhylip = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
+ m->mothurConvert(temp, iters);
+
+ output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "lt"; }
+ if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
+
+ temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
+ if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+ else {
+ if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
+ else { subsample = false; }
+ }
+
+ if (subsample == false) { iters = 1; }
+
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
m->mothurConvert(temp, processors);
exit(1);
}
}
-
/***********************************************************/
-int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
+int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
try {
- vector< vector<seqDist> > calcDists; //vector containing vectors that contains the summary results for each group compare
- calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
+
+ out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+
+ //output num seqs
+ out << simMatrix.size() << endl;
+
+ if (output == "lt") {
+ for (int b = 0; b < simMatrix.size(); b++) {
+ out << lookup[b]->getGroup() << '\t';
+ for (int n = 0; n < b; n++) {
+ if (m->control_pressed) { return 0; }
+ out << simMatrix[b][n] << '\t';
+ }
+ out << endl;
+ }
+ }else{
+ for (int b = 0; b < simMatrix.size(); m++) {
+ out << lookup[b]->getGroup() << '\t';
+ for (int n = 0; n < simMatrix[b].size(); n++) {
+ if (m->control_pressed) { return 0; }
+ out << simMatrix[b][n] << '\t';
+ }
+ out << endl;
+ }
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SummarySharedCommand", "printSims");
+ exit(1);
+ }
+}
+/***********************************************************/
+int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
+ try {
+ vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
+ vector< vector<seqDist> > calcDists; calcDists.resize(sumCalculators.size());
- if(processors == 1){
- driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
- m->appendFiles((sumFileName + ".temp"), sumFileName);
- m->mothurRemove((sumFileName + ".temp"));
- if (mult) {
- m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
- m->mothurRemove((sumAllFileName + ".temp"));
- }
- }else{
+ for (int thisIter = 0; thisIter < iters; thisIter++) {
- int process = 1;
- vector<int> processIDS;
+ vector<SharedRAbundVector*> thisItersLookup = thisLookup;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
+ if (subsample) {
+ SubSample sample;
+ vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+
+ //make copy of lookup so we don't get access violations
+ vector<SharedRAbundVector*> newLookup;
+ for (int k = 0; k < thisItersLookup.size(); k++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(thisItersLookup[k]->getLabel());
+ temp->setGroup(thisItersLookup[k]->getGroup());
+ newLookup.push_back(temp);
+ }
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
- driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
+ //for each bin
+ for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+ for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+ }
+
+ tempLabels = sample.getSample(newLookup, subsampleSize);
+ thisItersLookup = newLookup;
+ }
+
+
+ if(processors == 1){
+ driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
+ m->appendFiles((sumFileName + ".temp"), sumFileName);
+ m->mothurRemove((sumFileName + ".temp"));
+ if (mult) {
+ m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+ m->mothurRemove((sumAllFileName + ".temp"));
+ }
+ }else{
+
+ int process = 1;
+ vector<int> processIDS;
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
- //only do this if you want a distance file
- if (createPhylip) {
- string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
- ofstream outtemp;
- m->openOutputFile(tempdistFileName, outtemp);
+ if (pid > 0) {
+ processIDS.push_back(pid);
+ process++;
+ }else if (pid == 0){
+ driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
- for (int i = 0; i < calcDists.size(); i++) {
- outtemp << calcDists[i].size() << endl;
+ //only do this if you want a distance file
+ if (createPhylip) {
+ string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
+ ofstream outtemp;
+ m->openOutputFile(tempdistFileName, outtemp);
- for (int j = 0; j < calcDists[i].size(); j++) {
- outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+ for (int i = 0; i < calcDists.size(); i++) {
+ outtemp << calcDists[i].size() << endl;
+
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+ }
}
+ outtemp.close();
}
- outtemp.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
}
-
- exit(0);
- }else {
- m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
- for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
- exit(0);
}
- }
-
- //parent do your part
- driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
- m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
- m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
- if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
-
- //force parent to wait until all the processes are done
- for (int i = 0; i < processIDS.size(); i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
- for (int i = 0; i < processIDS.size(); i++) {
- m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
- m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
- if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
- if (createPhylip) {
- string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
- ifstream intemp;
- m->openInputFile(tempdistFileName, intemp);
+ //parent do your part
+ driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
+ m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
+ m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
+ if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
+
+ //force parent to wait until all the processes are done
+ for (int i = 0; i < processIDS.size(); i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+ m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+ if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
- for (int k = 0; k < calcDists.size(); k++) {
- int size = 0;
- intemp >> size; m->gobble(intemp);
+ if (createPhylip) {
+ string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
+ ifstream intemp;
+ m->openInputFile(tempdistFileName, intemp);
- for (int j = 0; j < size; j++) {
- int seq1 = 0;
- int seq2 = 0;
- float dist = 1.0;
+ for (int k = 0; k < calcDists.size(); k++) {
+ int size = 0;
+ intemp >> size; m->gobble(intemp);
- intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
-
- seqDist tempDist(seq1, seq2, dist);
- calcDists[k].push_back(tempDist);
+ for (int j = 0; j < size; j++) {
+ int seq1 = 0;
+ int seq2 = 0;
+ float dist = 1.0;
+
+ intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
+
+ seqDist tempDist(seq1, seq2, dist);
+ calcDists[k].push_back(tempDist);
+ }
}
+ intemp.close();
+ m->mothurRemove(tempdistFileName);
}
- intemp.close();
- m->mothurRemove(tempdistFileName);
}
- }
#else
- //////////////////////////////////////////////////////////////////////////////////////////////////////
- //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
- //Above fork() will clone, so memory is separate, but that's not the case with windows,
- //Taking advantage of shared memory to pass results vectors.
- //////////////////////////////////////////////////////////////////////////////////////////////////////
-
- vector<summarySharedData*> pDataArray;
- DWORD dwThreadIdArray[processors-1];
- HANDLE hThreadArray[processors-1];
-
- //Create processor worker threads.
- for( int i=1; i<processors; i++ ){
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //Taking advantage of shared memory to pass results vectors.
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
- //make copy of lookup so we don't get access violations
- vector<SharedRAbundVector*> newLookup;
- for (int k = 0; k < thisLookup.size(); k++) {
- SharedRAbundVector* temp = new SharedRAbundVector();
- temp->setLabel(thisLookup[k]->getLabel());
- temp->setGroup(thisLookup[k]->getGroup());
- newLookup.push_back(temp);
+ vector<summarySharedData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+
+ //make copy of lookup so we don't get access violations
+ vector<SharedRAbundVector*> newLookup;
+ for (int k = 0; k < thisLookup.size(); k++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(thisLookup[k]->getLabel());
+ temp->setGroup(thisLookup[k]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ //for each bin
+ for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+ for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
+ }
+
+ // Allocate memory for thread data.
+ summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
+ pDataArray.push_back(tempSum);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
}
- //for each bin
- for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
- if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
- for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
+ //parent do your part
+ driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
+ m->appendFiles((sumFileName + "0.temp"), sumFileName);
+ m->mothurRemove((sumFileName + "0.temp"));
+ if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+ m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+
+ for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
+
+ if (createPhylip) {
+ for (int k = 0; k < calcDists.size(); k++) {
+ int size = pDataArray[i]->calcDists[k].size();
+ for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
+ }
+ }
+
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
}
+
+#endif
+ }
+ calcDistsTotals.push_back(calcDists);
+
+ if (subsample) {
+
+ //clean up memory
+ for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+ thisItersLookup.clear();
+ for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
+ }
+ }
- // Allocate memory for thread data.
- summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
- pDataArray.push_back(tempSum);
- processIDS.push_back(i);
+ if (iters != 1) {
+ //we need to find the average distance and standard deviation for each groups distance
+
+ vector< vector<seqDist> > calcAverages; calcAverages.resize(sumCalculators.size());
+ for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
+ calcAverages[i].resize(calcDistsTotals[0][i].size());
- hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].seq1 = calcDists[i][j].seq1;
+ calcAverages[i][j].seq2 = calcDists[i][j].seq2;
+ calcAverages[i][j].dist = 0.0;
+ }
}
- //parent do your part
- driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
- m->appendFiles((sumFileName + "0.temp"), sumFileName);
- m->mothurRemove((sumFileName + "0.temp"));
- if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
+ for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
+ for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+ }
+ }
+ }
- //Wait until all threads have terminated.
- WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+ for (int i = 0; i < calcAverages.size(); i++) { //finds average.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist /= (float) iters;
+ }
+ }
- //Close all thread handles and free memory allocations.
- for(int i=0; i < pDataArray.size(); i++){
- m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
- m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+ //find standard deviation
+ vector< vector<seqDist> > stdDev; stdDev.resize(sumCalculators.size());
+ for (int i = 0; i < stdDev.size(); i++) { //initialize sums to zero.
+ stdDev[i].resize(calcDistsTotals[0][i].size());
- for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
-
- if (createPhylip) {
- for (int k = 0; k < calcDists.size(); k++) {
- int size = pDataArray[i]->calcDists[k].size();
- for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].seq1 = calcDists[i][j].seq1;
+ stdDev[i][j].seq2 = calcDists[i][j].seq2;
+ stdDev[i][j].dist = 0.0;
+ }
+ }
+
+ for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+ for (int i = 0; i < stdDev.size(); i++) {
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
}
}
-
- CloseHandle(hThreadArray[i]);
- delete pDataArray[i];
}
-
-#endif
- }
-
- if (createPhylip) {
+
+ for (int i = 0; i < stdDev.size(); i++) { //finds average.
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].dist /= (float) iters;
+ stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+ }
+ }
+
+ //print results
for (int i = 0; i < calcDists.size(); i++) {
- if (m->control_pressed) { break; }
-
- string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
- outputNames.push_back(distFileName);
- ofstream outDist;
- m->openOutputFile(distFileName, outDist);
- outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
-
- //initialize matrix
- vector< vector<float> > matrix; //square matrix to represent the distance
+ vector< vector<double> > matrix; //square matrix to represent the distance
matrix.resize(thisLookup.size());
for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
+ vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
+ stdmatrix.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); }
+
- for (int j = 0; j < calcDists[i].size(); j++) {
- int row = calcDists[i][j].seq1;
- int column = calcDists[i][j].seq2;
- float dist = calcDists[i][j].dist;
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ int row = calcAverages[i][j].seq1;
+ int column = calcAverages[i][j].seq2;
+ float dist = calcAverages[i][j].dist;
+ float stdDist = stdDev[i][j].dist;
matrix[row][column] = dist;
matrix[column][row] = dist;
+ stdmatrix[row][column] = stdDist;
+ stdmatrix[column][row] = stdDist;
}
- //output to file
- outDist << thisLookup.size() << endl;
- for (int r=0; r<thisLookup.size(); r++) {
- //output name
- string name = thisLookup[r]->getGroup();
- if (name.length() < 10) { //pad with spaces to make compatible
- while (name.length() < 10) { name += " "; }
+ string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".ave.dist";
+ outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+ ofstream outAve;
+ m->openOutputFile(distFileName, outAve);
+ outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
+
+ printSims(outAve, matrix);
+
+ outAve.close();
+
+ distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".std.dist";
+ outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+ ofstream outSTD;
+ m->openOutputFile(distFileName, outSTD);
+ outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
+
+ printSims(outSTD, stdmatrix);
+
+ outSTD.close();
+
+ }
+ }else {
+ if (createPhylip) {
+ for (int i = 0; i < calcDists.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ //initialize matrix
+ vector< vector<double> > matrix; //square matrix to represent the distance
+ matrix.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
+
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ int row = calcDists[i][j].seq1;
+ int column = calcDists[i][j].seq2;
+ double dist = calcDists[i][j].dist;
+
+ matrix[row][column] = dist;
+ matrix[column][row] = dist;
}
- outDist << name << '\t';
-
- //output distances
- for (int l = 0; l < r; l++) { outDist << matrix[r][l] << '\t'; }
- outDist << endl;
+
+ string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist";
+ outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+ ofstream outDist;
+ m->openOutputFile(distFileName, outDist);
+ outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
+
+ printSims(outDist, matrix);
+
+ outDist.close();
}
-
- outDist.close();
}
}
- return 0;
+
+ return 0;
}
catch(exception& e) {
m->errorOut(e, "SummarySharedCommand", "process");
vector<Calculator*> sumCalculators;
InputData* input;
- bool abort, allLines, mult, all, createPhylip;
+ bool abort, allLines, mult, all, createPhylip, subsample;
set<string> labels; //holds labels to be used
- string label, calc, groups, sharedfile;
+ string label, calc, groups, sharedfile, output;
vector<string> Estimators, Groups, outputNames;
vector<SharedRAbundVector*> lookup;
string format, outputDir;
- int numGroups, processors;
+ int numGroups, processors, subsampleSize, iters;
int process(vector<SharedRAbundVector*>, string, string);
int driver(vector<SharedRAbundVector*>, int, int, string, string, vector< vector<seqDist> >&);
+ int printSims(ostream&, vector< vector<double> >&);
};
}
}
/**************************************************************************************************/
+double TrialSwap2::getSD (int runs, vector<double> scorevec, double nullMean)
+{
+ try{
+ double sum = 0;
+ for(int i=0;i<runs;i++)
+ {
+ if (m->control_pressed) { return 0; }
+ sum += pow((scorevec[i] - nullMean),2);
+ }
+ return sqrt( (1/double(runs)) * sum );
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrialSwap2", "getSD");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+double TrialSwap2::get_zscore (double sd, double nullMean, double initscore)
+{
+ try {
+ return (initscore - nullMean) / sd;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrialSwap2", "get_zscore");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
int TrialSwap2::print_matrix(vector<vector<int> > &matrix, int nrows, int ncols)
{
try {
double calc_vratio (int, int, vector<int>, vector<int>);
int calc_checker (vector<vector<int> > &, vector<int>, int, int);
double calc_c_score (vector<vector<int> > &, vector<int>, int, int);
+ double get_zscore (double, double, double);
+ double getSD (int, vector<double>, double);
private:
int print_matrix(vector<vector<int> > &, int, int);
-
+
};
#endif