X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=screenseqscommand.cpp;h=7f385d39c729e1acbc2d33325e9640c706f095e2;hb=49d2b7459c5027557564b21e9487dadafbbbdc96;hp=9494865c4f66e302182b8fd221e0b44951cb2518;hpb=5d00ef3e809832f08efdd691a9eace8ac20feb07;p=mothur.git diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index 9494865..7f385d3 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -70,6 +70,33 @@ string ScreenSeqsCommand::getHelpString(){ exit(1); } } +//********************************************************************************************************************** +string ScreenSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "fasta") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "taxonomy") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "name") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "group") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "accnos") { outputFileName = "bad.accnos"; } + else if (type == "qfile") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "alignreport") { outputFileName = "good.align.report"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "ScreenSeqsCommand", "getOutputFileNameTag"); + exit(1); + } +} + //********************************************************************************************************************** ScreenSeqsCommand::ScreenSeqsCommand(){ try { @@ -288,7 +315,7 @@ int ScreenSeqsCommand::execute(){ getSummary(positions); } else { - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else @@ -296,6 +323,7 @@ int ScreenSeqsCommand::execute(){ else { int numFastaSeqs = 0; positions = m->setFilePosFasta(fastafile, numFastaSeqs); + if (positions.size() < processors) { processors = positions.size(); } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; @@ -308,8 +336,8 @@ int ScreenSeqsCommand::execute(){ #endif } - string goodSeqFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "good" + m->getExtension(fastafile); - string badAccnosFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos"; + string goodSeqFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile); + string badAccnosFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos"); int numFastaSeqs = 0; set badSeqNames; @@ -398,17 +426,10 @@ int ScreenSeqsCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - - //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ - numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames); - }else{ - numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); - } - //#else - // numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames); - //#endif - if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; } + if(processors == 1){ numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames); } + else{ numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); } + + if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; } #endif #ifdef USE_MPI @@ -520,7 +541,7 @@ int ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ string seqName, seqList, group; set::iterator it; - string goodNameFile = outputDir + m->getRootName(m->getSimpleName(namefile)) + "good" + m->getExtension(namefile); + string goodNameFile = outputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile); outputNames.push_back(goodNameFile); outputTypes["name"].push_back(goodNameFile); ofstream goodNameOut; m->openOutputFile(goodNameFile, goodNameOut); @@ -566,7 +587,7 @@ int ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ ifstream inputGroups; m->openInputFile(groupfile, inputGroups); - string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "good" + m->getExtension(groupfile); + string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile); outputNames.push_back(goodGroupFile); outputTypes["group"].push_back(goodGroupFile); ofstream goodGroupOut; m->openOutputFile(goodGroupFile, goodGroupOut); @@ -618,7 +639,7 @@ int ScreenSeqsCommand::getSummary(vector& positions){ vector longHomoPolymer; vector positions; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else @@ -626,6 +647,7 @@ int ScreenSeqsCommand::getSummary(vector& positions){ else { int numFastaSeqs = 0; positions = m->setFilePosFasta(fastafile, numFastaSeqs); + if (positions.size() < processors) { processors = positions.size(); } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; @@ -645,7 +667,7 @@ int ScreenSeqsCommand::getSummary(vector& positions){ driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]); #else int numSeqs = 0; - //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]); }else{ @@ -750,7 +772,7 @@ int ScreenSeqsCommand::driverCreateSummary(vector& startPosition, vectormothurOut("Optimizing sequence: " + toString(count)); m->mothurOutEndLine(); } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= filePos.end)) { break; } #else @@ -776,7 +798,7 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector& startPosition, int num = 0; vector processIDS; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { @@ -898,8 +920,8 @@ int ScreenSeqsCommand::screenGroupFile(set badSeqNames){ string seqName, group; set::iterator it; - string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "good" + m->getExtension(groupfile); - outputNames.push_back(goodGroupFile); outputTypes["group"].push_back(goodGroupFile); + string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile); + outputNames.push_back(goodGroupFile); outputTypes["group"].push_back(goodGroupFile); ofstream goodGroupOut; m->openOutputFile(goodGroupFile, goodGroupOut); while(!inputGroups.eof()){ @@ -950,7 +972,7 @@ int ScreenSeqsCommand::screenAlignReport(set badSeqNames){ string seqName, group; set::iterator it; - string goodAlignReportFile = outputDir + m->getRootName(m->getSimpleName(alignreport)) + "good" + m->getExtension(alignreport); + string goodAlignReportFile = outputDir + m->getRootName(m->getSimpleName(alignreport)) + getOutputFileNameTag("alignreport"); outputNames.push_back(goodAlignReportFile); outputTypes["alignreport"].push_back(goodAlignReportFile); ofstream goodAlignReportOut; m->openOutputFile(goodAlignReportFile, goodAlignReportOut); @@ -1014,7 +1036,7 @@ int ScreenSeqsCommand::screenTaxonomy(set badSeqNames){ string seqName, tax; set::iterator it; - string goodTaxFile = outputDir + m->getRootName(m->getSimpleName(taxonomy)) + "good" + m->getExtension(taxonomy); + string goodTaxFile = outputDir + m->getRootName(m->getSimpleName(taxonomy)) + getOutputFileNameTag("taxonomy", taxonomy); outputNames.push_back(goodTaxFile); outputTypes["taxonomy"].push_back(goodTaxFile); ofstream goodTaxOut; m->openOutputFile(goodTaxFile, goodTaxOut); @@ -1063,7 +1085,7 @@ int ScreenSeqsCommand::screenQual(set badSeqNames){ m->openInputFile(qualfile, in); set::iterator it; - string goodQualFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + "good" + m->getExtension(qualfile); + string goodQualFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile); outputNames.push_back(goodQualFile); outputTypes["qfile"].push_back(goodQualFile); ofstream goodQual; m->openOutputFile(goodQualFile, goodQual); @@ -1170,7 +1192,7 @@ int ScreenSeqsCommand::driver(linePair filePos, string goodFName, string badAccn count++; } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= filePos.end)) { break; } #else @@ -1282,7 +1304,7 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, st int process = 1; int num = 0; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { @@ -1363,7 +1385,7 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, st if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); } // Allocate memory for thread data. - sumScreenData* tempSum = new sumScreenData(startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, filename, m, lines[i].start, lines[i].end,goodFileName+extension, badAccnos+extension, &badSeqNames); + sumScreenData* tempSum = new sumScreenData(startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, filename, m, lines[i].start, lines[i].end,goodFileName+extension, badAccnos+extension); pDataArray.push_back(tempSum); //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier @@ -1380,6 +1402,7 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, st //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ num += pDataArray[i]->count; + for (set::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it); } CloseHandle(hThreadArray[i]); delete pDataArray[i]; }