X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=b208209e2a808a0f92cca9f160f4da456450e153;hb=e0c50fc42e953dbe258feb9bc1d1d7dc10fafff0;hp=0c21c8993552929a5299841a5089783f7b8c2aef;hpb=cc19310422f125d6628980bd1148e1e816792382;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 0c21c89..b208209 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -15,35 +15,35 @@ //********************************************************************************************************************** vector TrimSeqsCommand::setParameters(){ try { - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos); - CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile); - CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount); - CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip); - CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig); - CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop); - CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength); - CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength); - CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); - CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs); - CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs); - CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs); - CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles); - CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward); - CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim); - CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold); - CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage); - CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage); - CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage); - CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize); - CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize); - CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst); - CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); + CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos); + CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile); + CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount); + CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip); + CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig); + CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop); + CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength); + CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength); + CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); + CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); + CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); + CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); + CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles); + CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward); + CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim); + CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold); + CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage); + CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage); + CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage); + CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize); + CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize); + CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst); + CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -101,31 +101,24 @@ string TrimSeqsCommand::getHelpString(){ } } //********************************************************************************************************************** -string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string TrimSeqsCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "qfile") { outputFileName = "qual"; } - else if (type == "fasta") { outputFileName = "fasta"; } - else if (type == "group") { outputFileName = "groups"; } - else if (type == "name") { outputFileName = "names"; } - else if (type == "count") { outputFileName = "count_table"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "qfile") { pattern = "[filename],[tag],qual"; } + else if (type == "fasta") { pattern = "[filename],[tag],fasta"; } + else if (type == "group") { pattern = "[filename],groups"; } + else if (type == "name") { pattern = "[filename],[tag],names"; } + else if (type == "count") { pattern = "[filename],[tag],count_table-[filename],count_table"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "getOutputPattern"); + exit(1); + } } - - //********************************************************************************************************************** TrimSeqsCommand::TrimSeqsCommand(){ @@ -382,15 +375,18 @@ int TrimSeqsCommand::execute(){ vector > qualFileNames; vector > nameFileNames; - string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta"); + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); + variables["[tag]"] = "trim"; + string trimSeqFile = getOutputFileName("fasta",variables); + string trimQualFile = getOutputFileName("qfile",variables); outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); - - string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta"); + + variables["[tag]"] = "scrap"; + string scrapSeqFile = getOutputFileName("fasta",variables); + string scrapQualFile = getOutputFileName("qfile",variables); outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile); - string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile"); - string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile"); - if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); @@ -398,8 +394,11 @@ int TrimSeqsCommand::execute(){ outputTypes["qfile"].push_back(scrapQualFile); } - string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name"); - string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name"); + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); + variables["[tag]"] = "trim"; + string trimNameFile = getOutputFileName("name",variables); + variables["[tag]"] = "scrap"; + string scrapNameFile = getOutputFileName("name",variables); if (nameFile != "") { m->readNames(nameFile, nameMap); @@ -409,8 +408,11 @@ int TrimSeqsCommand::execute(){ outputTypes["name"].push_back(scrapNameFile); } - string trimCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "trim." + getOutputFileNameTag("count"); - string scrapCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "scrap." + getOutputFileNameTag("count"); + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile)); + variables["[tag]"] = "trim"; + string trimCountFile = getOutputFileName("count",variables); + variables["[tag]"] = "scrap"; + string scrapCountFile = getOutputFileName("count",variables); if (countfile != "") { CountTable ct; @@ -429,7 +431,9 @@ int TrimSeqsCommand::execute(){ if(oligoFile != ""){ createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames); if ((createGroup) && (countfile == "")){ - outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group"); + map myvariables; + myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); + outputGroupFileName = getOutputFileName("group",myvariables); outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } } @@ -488,9 +492,11 @@ int TrimSeqsCommand::execute(){ m->openInputFile(it->first, in); ofstream out; - string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)); - if (countfile == "") { thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); } - else { thisGroupName += getOutputFileNameTag("count"); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); } + map myvariables; + myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first)); + string thisGroupName = ""; + if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); } + else { thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); } m->openOutputFile(thisGroupName, out); if (countfile != "") { out << "Representative_Sequence\ttotal\t" << it->second << endl; } @@ -773,32 +779,10 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } } if(trashCode.length() == 0){ - currSeq.setAligned(currSeq.getUnaligned()); - currSeq.printSequence(trimFASTAFile); - - if(qFileName != ""){ - currQual.printQScores(trimQualFile); - } - - - if(nameFile != ""){ - map::iterator itName = nameMap.find(currSeq.getName()); - if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; } - else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } - } - - int numRedundants = 0; - if (countfile != "") { - map::iterator itCount = nameCount.find(currSeq.getName()); - if (itCount != nameCount.end()) { - trimCountFile << itCount->first << '\t' << itCount->second << endl; - numRedundants = itCount->second-1; - }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); } - } - - if (createGroup) { + string thisGroup = ""; + if (createGroup) { if(barcodes.size() != 0){ - string thisGroup = barcodeNameVector[barcodeIndex]; + thisGroup = barcodeNameVector[barcodeIndex]; if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { if(thisGroup != "") { @@ -808,52 +792,83 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } } } - - if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); } - - if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; } - else { groupMap[currSeq.getName()] = thisGroup; } - - if (nameFile != "") { - map::iterator itName = nameMap.find(currSeq.getName()); - if (itName != nameMap.end()) { - vector thisSeqsNames; - m->splitAtChar(itName->second, thisSeqsNames, ','); - numRedundants = thisSeqsNames.size()-1; //we already include ourselves below - for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self - outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; - } - }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } - } - - map::iterator it = groupCounts.find(thisGroup); - if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; } - else { groupCounts[it->first] += (1 + numRedundants); } + } + } + + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + currSeq.setAligned(currSeq.getUnaligned()); + currSeq.printSequence(trimFASTAFile); + + if(qFileName != ""){ + currQual.printQScores(trimQualFile); + } + + + if(nameFile != ""){ + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; } + else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } + + int numRedundants = 0; + if (countfile != "") { + map::iterator itCount = nameCount.find(currSeq.getName()); + if (itCount != nameCount.end()) { + trimCountFile << itCount->first << '\t' << itCount->second << endl; + numRedundants = itCount->second-1; + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); } + } + + if (createGroup) { + if(barcodes.size() != 0){ + + if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); } + + if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; } + else { groupMap[currSeq.getName()] = thisGroup; } + + if (nameFile != "") { + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { + vector thisSeqsNames; + m->splitAtChar(itName->second, thisSeqsNames, ','); + numRedundants = thisSeqsNames.size()-1; //we already include ourselves below + for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self + outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; + } + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } + + map::iterator it = groupCounts.find(thisGroup); + if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; } + else { groupCounts[it->first] += (1 + numRedundants); } - } - } - - if(allFiles){ - ofstream output; - m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output); - currSeq.printSequence(output); - output.close(); - - if(qFileName != ""){ - m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output); - currQual.printQScores(output); - output.close(); - } - - if(nameFile != ""){ - map::iterator itName = nameMap.find(currSeq.getName()); - if (itName != nameMap.end()) { - m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output); - output << itName->first << '\t' << itName->second << endl; - output.close(); - }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } - } - } + } + } + + if(allFiles){ + ofstream output; + m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output); + currSeq.printSequence(output); + output.close(); + + if(qFileName != ""){ + m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output); + currQual.printQScores(output); + output.close(); + } + + if(nameFile != ""){ + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { + m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output); + output << itName->first << '\t' << itName->second << endl; + output.close(); + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } + } + } } else{ if(nameFile != ""){ //needs to be before the currSeq name is changed @@ -1036,10 +1051,10 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName HANDLE hThreadArray[processors-1]; //Create processor worker threads. - for( int i=0; i > tempFASTAFileNames = fastaFileNames; vector > tempPrimerQualFileNames = qualFileNames; vector > tempNameFileNames = nameFileNames; @@ -1081,14 +1096,14 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, - lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m, + lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast, qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount); pDataArray.push_back(tempTrim); - hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]); + hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]); } //parent do my part @@ -1103,8 +1118,32 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName m->openOutputFile(trimNameFileName, temp); temp.close(); m->openOutputFile(scrapNameFileName, temp); temp.close(); } + vector > tempFASTAFileNames = fastaFileNames; + vector > tempPrimerQualFileNames = qualFileNames; + vector > tempNameFileNames = nameFileNames; + if(allFiles){ + ofstream temp; + string extension = toString(processors-1) + ".temp"; + for(int i=0;iopenOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); + + if(qFileName != ""){ + tempPrimerQualFileNames[i][j] += extension; + m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + } + if(nameFile != ""){ + tempNameFileNames[i][j] += extension; + m->openOutputFile(tempNameFileNames[i][j], temp); temp.close(); + } + } + } + } + } - driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]); + driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]); processIDS.push_back(processors-1); @@ -1113,6 +1152,9 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->count != pDataArray[i]->lineEnd) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; + } for (map::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) { map::iterator it2 = groupCounts.find(it->first); if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; } @@ -1475,57 +1517,65 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< string primerName = primerNameVector[itPrimer->second]; string barcodeName = barcodeNameVector[itBar->second]; - string comboGroupName = ""; - string fastaFileName = ""; - string qualFileName = ""; - string nameFileName = ""; - string countFileName = ""; - - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; - } - else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; - } - } - - - ofstream temp; - fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta"; - if (uniqueNames.count(fastaFileName) == 0) { - outputNames.push_back(fastaFileName); - outputTypes["fasta"].push_back(fastaFileName); - uniqueNames.insert(fastaFileName); - } - - fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; - m->openOutputFile(fastaFileName, temp); temp.close(); - - if(qFileName != ""){ - qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual"; - if (uniqueNames.count(qualFileName) == 0) { - outputNames.push_back(qualFileName); - outputTypes["qfile"].push_back(qualFileName); - } - - qualFileNames[itBar->second][itPrimer->second] = qualFileName; - m->openOutputFile(qualFileName, temp); temp.close(); - } - - if(nameFile != ""){ - nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names"; - if (uniqueNames.count(nameFileName) == 0) { - outputNames.push_back(nameFileName); - outputTypes["name"].push_back(nameFileName); - } - - nameFileNames[itBar->second][itPrimer->second] = nameFileName; - m->openOutputFile(nameFileName, temp); temp.close(); - } + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->second]; + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->second]; + } + else{ + comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + } + } + + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); + variables["[tag]"] = comboGroupName; + fastaFileName = getOutputFileName("fasta", variables); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + } + + fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; + m->openOutputFile(fastaFileName, temp); temp.close(); + + if(qFileName != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName)); + qualFileName = getOutputFileName("qfile", variables); + if (uniqueNames.count(qualFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + + qualFileNames[itBar->second][itPrimer->second] = qualFileName; + m->openOutputFile(qualFileName, temp); temp.close(); + } + + if(nameFile != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); + nameFileName = getOutputFileName("name", variables); + if (uniqueNames.count(nameFileName) == 0) { + outputNames.push_back(nameFileName); + outputTypes["name"].push_back(nameFileName); + } + + nameFileNames[itBar->second][itPrimer->second] = nameFileName; + m->openOutputFile(nameFileName, temp); temp.close(); + } + } } } }