}
}
//**********************************************************************************************************************
+string ClassifySeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "taxonomy") { outputFileName = "taxonomy"; }
+ else if (type == "accnos") { outputFileName = "flip.accnos"; }
+ else if (type == "taxsummary") { outputFileName = "tax.summary"; }
+ else if (type == "matchdist") { outputFileName = "match.dist"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClassifySeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ClassifySeqsCommand::ClassifySeqsCommand(){
try {
abort = true; calledHelp = true;
search = "kmer";
}
- if (namefileNames.size() == 0){
- vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]);
- parser.getNameFile(files);
- }
-
- }
-
+ if (!abort) {
+ if (namefileNames.size() == 0){
+ if (fastaFileNames.size() != 0) {
+ vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]);
+ parser.getNameFile(files);
+ }
+ }
+ }
+ }
}
catch(exception& e) {
m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
int ClassifySeqsCommand::execute(){
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
+
if(method == "bayesian"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip); }
else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand()); }
else {
string baseTName = taxonomyFileName;
if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy(); }
- string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
- RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
- if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
- RippedTaxName += ".";
-
+ //set rippedTaxName to
+ string RippedTaxName = "";
+ bool foundDot = false;
+ for (int i = baseTName.length()-1; i >= 0; i--) {
+ if (foundDot && (baseTName[i] != '.')) { RippedTaxName = baseTName[i] + RippedTaxName; }
+ else if (foundDot && (baseTName[i] == '.')) { break; }
+ else if (!foundDot && (baseTName[i] == '.')) { foundDot = true; }
+ }
+ if (RippedTaxName != "") { RippedTaxName += "."; }
+
if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
- string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
- string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "flip.accnos";
+ string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("taxonomy");
+ string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("accnos");
string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
- string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary";
+ string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("taxsummary");
if ((method == "knn") && (search == "distance")) {
- string DistName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "match.dist";
+ string DistName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("matchdist");
classify->setDistName(DistName); outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
}
#else
vector<unsigned long long> positions;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
positions = m->divideFile(fastaFileNames[s], processors);
for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); }
#else
lines.push_back(new linePair(0, 1000));
}else {
positions = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs);
+ if (positions.size() < processors) { processors = positions.size(); }
//figure out how many sequences you have to process
int numSeqsPerProcessor = numFastaSeqs / processors;
if(namefile != "") {
m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();
-
nameMap.clear(); //remove old names
-
- ifstream inNames;
- m->openInputFile(namefileNames[s], inNames);
-
- string firstCol, secondCol;
- while(!inNames.eof()) {
- inNames >> firstCol >> secondCol; m->gobble(inNames);
-
- vector<string> temp;
- m->splitAtComma(secondCol, temp);
-
- nameMap[firstCol] = temp;
- }
- inNames.close();
-
+ m->readNames(namefileNames[s], nameMap);
m->mothurOut(" Done."); m->mothurOutEndLine();
}
#endif
int num = 0;
processIDS.clear();
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
//loop through and create all the processes you want
}
#endif
-
+ vector<string> nonBlankAccnosFiles;
+ if (!(m->isBlank(accnos))) { nonBlankAccnosFiles.push_back(accnos); }
+ else { m->mothurRemove(accnos); } //remove so other files can be renamed to it
+
for(int i=0;i<processIDS.size();i++){
- appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
- appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
- appendTaxFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
+ m->appendFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
+ m->appendFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
+ if (!(m->isBlank(accnos + toString(processIDS[i]) + ".temp"))) {
+ nonBlankAccnosFiles.push_back(accnos + toString(processIDS[i]) + ".temp");
+ }else { m->mothurRemove((accnos + toString(processIDS[i]) + ".temp")); }
+
m->mothurRemove((m->getFullPathName(taxFileName) + toString(processIDS[i]) + ".temp"));
m->mothurRemove((m->getFullPathName(tempTaxFile) + toString(processIDS[i]) + ".temp"));
- m->mothurRemove((m->getFullPathName(accnos) + toString(processIDS[i]) + ".temp"));
}
+ //append accnos files
+ if (nonBlankAccnosFiles.size() != 0) {
+ rename(nonBlankAccnosFiles[0].c_str(), accnos.c_str());
+
+ for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+ m->appendFiles(nonBlankAccnosFiles[h], accnos);
+ m->mothurRemove(nonBlankAccnosFiles[h]);
+ }
+ }else { //recreate the accnosfile if needed
+ ofstream out;
+ m->openOutputFile(accnos, out);
+ out.close();
+ }
+
return num;
}
exit(1);
}
}
-/**************************************************************************************************/
-
-void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
- try{
-
- ofstream output;
- ifstream input;
- m->openOutputFileAppend(filename, output);
- m->openInputFile(temp, input);
-
- while(char c = input.get()){
- if(input.eof()) { break; }
- else { output << c; }
- }
-
- input.close();
- output.close();
- }
- catch(exception& e) {
- m->errorOut(e, "ClassifySeqsCommand", "appendTaxFiles");
- exit(1);
- }
-}
-
//**********************************************************************************************************************
int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string accnos, string filename){
}
delete candidateSeq;
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
unsigned long long pos = inFASTA.tellg();
if ((pos == -1) || (pos >= filePos->end)) { break; }
#else