From 0bd3a2d33b478f0b09fd6b8ce562e9ab41227535 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Fri, 21 Dec 2012 14:26:47 -0500 Subject: [PATCH] added ignore option to oligos files for make.contigs, trim.seqs, trim.flows --- makecontigscommand.cpp | 125 ++++++++++--------- makecontigscommand.h | 15 ++- trimflowscommand.cpp | 202 +++++++++++++++++-------------- trimseqscommand.cpp | 264 +++++++++++++++++++++-------------------- trimseqscommand.h | 142 +++++++++++----------- 5 files changed, 406 insertions(+), 342 deletions(-) diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index 15ffb07..32e2d68 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -628,8 +628,10 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o tempFASTAFileNames[i][j] += toString(getpid()) + ".temp"; m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); - tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp"; - m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + if (files[processors-1][1] != "") { + tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp"; + m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + } } } } @@ -750,9 +752,10 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o tempFASTAFileNames[i][j] += extension; m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); - - tempPrimerQualFileNames[i][j] += extension; - m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + if (files[processors-1][1] != "") { + tempPrimerQualFileNames[i][j] += extension; + m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + } } } } @@ -778,9 +781,10 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o tempFASTAFileNames[i][j] += extension; m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); - - tempPrimerQualFileNames[i][j] += extension; - m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + if (files[processors-1][1] != "") { + tempPrimerQualFileNames[i][j] += extension; + m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + } } } } @@ -796,6 +800,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } //do my part + processIDS.push_back(processors-1); num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputQual+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputScrapQual+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames); //Wait until all threads have terminated. @@ -1032,8 +1037,12 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string } } - + if(trashCode.length() == 0){ + bool ignore = false; + + if (m->debug) { m->mothurOut(fSeq.getName()); } + if (createGroup) { if(barcodes.size() != 0){ string thisGroup = barcodeNameVector[barcodeIndex]; @@ -1049,16 +1058,20 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); } - groupMap[fSeq.getName()] = thisGroup; + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + groupMap[fSeq.getName()] = thisGroup; - map::iterator it = groupCounts.find(thisGroup); - if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; } - else { groupCounts[it->first] ++; } + map::iterator it = groupCounts.find(thisGroup); + if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; } + else { groupCounts[it->first] ++; } + }else { ignore = true; } } } + if (m->debug) { m->mothurOut("\n"); } - if(allFiles){ + if(allFiles && !ignore){ ofstream output; m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output); output << ">" << fSeq.getName() << endl << contig << endl; @@ -1657,7 +1670,7 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, vect while(!in.eof()){ in >> type; - + cout << type << endl; if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } if(type[0] == '#'){ @@ -1740,6 +1753,7 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, vect barcodes[indexBarcode]=newPair; indexBarcode++; barcodeNameVector.push_back(group); + cout << group << endl; }else if(type == "LINKER"){ linker.push_back(foligo); m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n"); @@ -1781,46 +1795,49 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, vect string primerName = primerNameVector[itPrimer->first]; string barcodeName = barcodeNameVector[itBar->first]; - - string comboGroupName = ""; - string fastaFileName = ""; - string qualFileName = ""; - string nameFileName = ""; - string countFileName = ""; - - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->first]; - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->first]; - } - else{ - comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first]; - } - } - - - ofstream temp; - fastaFileName = rootname + comboGroupName + ".fasta"; - if (uniqueNames.count(fastaFileName) == 0) { - outputNames.push_back(fastaFileName); - outputTypes["fasta"].push_back(fastaFileName); - uniqueNames.insert(fastaFileName); - } - - fastaFileNames[itBar->first][itPrimer->first] = fastaFileName; - m->openOutputFile(fastaFileName, temp); temp.close(); - - if ((fqualfile != "") || (ffastqfile != "") || (file != "")) { - qualFileName = rootname + ".qual"; - if (uniqueNames.count(qualFileName) == 0) { - outputNames.push_back(qualFileName); - outputTypes["qfile"].push_back(qualFileName); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->first]; + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->first]; + } + else{ + comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first]; + } + } + + + ofstream temp; + fastaFileName = rootname + comboGroupName + ".fasta"; + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + } + + fastaFileNames[itBar->first][itPrimer->first] = fastaFileName; + m->openOutputFile(fastaFileName, temp); temp.close(); + + if ((fqualfile != "") || (ffastqfile != "") || (file != "")) { + qualFileName = rootname + ".qual"; + if (uniqueNames.count(qualFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + + qualFileNames[itBar->first][itPrimer->first] = qualFileName; + m->openOutputFile(qualFileName, temp); temp.close(); } - - qualFileNames[itBar->first][itPrimer->first] = qualFileName; - m->openOutputFile(qualFileName, temp); temp.close(); } } } diff --git a/makecontigscommand.h b/makecontigscommand.h index 65b3658..2732d68 100644 --- a/makecontigscommand.h +++ b/makecontigscommand.h @@ -335,6 +335,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ } if(trashCode.length() == 0){ + bool ignore = false; if (pDataArray->createGroup) { if(pDataArray->barcodes.size() != 0){ string thisGroup = pDataArray->barcodeNameVector[barcodeIndex]; @@ -350,16 +351,18 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ if (pDataArray->m->debug) { pDataArray->m->mothurOut(", group= " + thisGroup + "\n"); } - pDataArray->groupMap[fSeq.getName()] = thisGroup; - - map::iterator it = pDataArray->groupCounts.find(thisGroup); - if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; } - else { pDataArray->groupCounts[it->first] ++; } + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + pDataArray->groupMap[fSeq.getName()] = thisGroup; + map::iterator it = pDataArray->groupCounts.find(thisGroup); + if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; } + else { pDataArray->groupCounts[it->first] ++; } + }else { ignore = true; } } } - if(pDataArray->allFiles){ + if(pDataArray->allFiles && !ignore){ ofstream output; pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output); output << ">" << fSeq.getName() << endl << contig << endl; diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index cc49755..44121b9 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -63,7 +63,7 @@ string TrimFlowsCommand::getOutputPattern(string type) { if (type == "flow") { pattern = "[filename],[tag],flow"; } else if (type == "fasta") { pattern = "[filename],flow.fasta"; } - else if (type == "file") { pattern = "[filename],flow.files"; } + else if (type == "file") { pattern = "[filename],[tag],flow.files"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -313,27 +313,29 @@ int TrimFlowsCommand::execute(){ for(int i=0;imothurRemove(barcodePrimerComboFileNames[i][j]); - } - else{ - output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl; - outputNames.push_back(barcodePrimerComboFileNames[i][j]); - outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]); - } - namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]); + if (barcodePrimerComboFileNames[i][j] != "") { + FILE * pFile; + unsigned long long size; + + //get num bytes in file + pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell(pFile); + fclose (pFile); + } + + if(size < 10){ + m->mothurRemove(barcodePrimerComboFileNames[i][j]); + } + else{ + output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl; + outputNames.push_back(barcodePrimerComboFileNames[i][j]); + outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]); + } + namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]); + } } } } @@ -350,14 +352,7 @@ int TrimFlowsCommand::execute(){ } outputTypes["file"].push_back(flowFilesFileName); outputNames.push_back(flowFilesFileName); - -// set fasta file as new current fastafile -// string current = ""; -// itTypes = outputTypes.find("fasta"); -// if (itTypes != outputTypes.end()) { -// if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } -// } - + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -399,10 +394,12 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN if(allFiles){ for(int i=0;iopenOutputFile(thisBarcodePrimerComboFileNames[i][j], temp); - temp << maxFlows << endl; - temp.close(); + if (thisBarcodePrimerComboFileNames[i][j] != "") { + ofstream temp; + m->openOutputFile(thisBarcodePrimerComboFileNames[i][j], temp); + temp << maxFlows << endl; + temp.close(); + } } } } @@ -471,19 +468,35 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN } if(trashCode.length() == 0){ - - flowData.printFlows(trimFlowFile); - - if(fasta) { currSeq.printSequence(fastaFile); } - - if(allFiles){ - ofstream output; - m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output); - output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); - - flowData.printFlows(output); - output.close(); - } + string thisGroup = ""; + if(barcodes.size() != 0){ + thisGroup = barcodeNameVector[barcodeIndex]; + if (primers.size() != 0) { + if (primerNameVector[primerIndex] != "") { + if(thisGroup != "") { + thisGroup += "." + primerNameVector[primerIndex]; + }else { + thisGroup = primerNameVector[primerIndex]; + } + } + } + } + + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + flowData.printFlows(trimFlowFile); + + if(fasta) { currSeq.printSequence(fastaFile); } + + if(allFiles){ + ofstream output; + m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output); + output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); + + flowData.printFlows(output); + output.close(); + } + } } else{ flowData.printFlows(scrapFlowFile, trashCode); @@ -620,34 +633,37 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ string primerName = primerNameVector[itPrimer->second]; string barcodeName = barcodeNameVector[itBar->second]; - - string comboGroupName = ""; - string fileName = ""; - - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - variables["[tag]"] = comboGroupName; - fileName = getOutputFileName("flow", variables); - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; - } - else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; - } - variables["[tag]"] = comboGroupName; - fileName = getOutputFileName("flow", variables); - } - - outFlowFileNames[itBar->second][itPrimer->second] = fileName; - - ofstream temp; - m->openOutputFile(fileName, temp); - temp.close(); + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else { + string comboGroupName = ""; + string fileName = ""; + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->second]; + variables["[tag]"] = comboGroupName; + fileName = getOutputFileName("flow", variables); + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->second]; + } + else{ + comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + } + variables["[tag]"] = comboGroupName; + fileName = getOutputFileName("flow", variables); + } + + outFlowFileNames[itBar->second][itPrimer->second] = fileName; + + ofstream temp; + m->openOutputFile(fileName, temp); + temp.close(); + } } } } @@ -802,11 +818,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); - + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp"; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } } @@ -864,11 +881,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); - + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += extension; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } } @@ -898,10 +916,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += toString(processors-1) + ".temp"; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } @@ -945,8 +965,10 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) { for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) { - m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]); - m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp")); + if (barcodePrimerComboFileNames[j][k] != "") { + m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]); + m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp")); + } } } } diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index ed2b046..5308407 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -779,32 +779,10 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } } if(trashCode.length() == 0){ - currSeq.setAligned(currSeq.getUnaligned()); - currSeq.printSequence(trimFASTAFile); - - if(qFileName != ""){ - currQual.printQScores(trimQualFile); - } - - - if(nameFile != ""){ - map::iterator itName = nameMap.find(currSeq.getName()); - if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; } - else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } - } - - int numRedundants = 0; - if (countfile != "") { - map::iterator itCount = nameCount.find(currSeq.getName()); - if (itCount != nameCount.end()) { - trimCountFile << itCount->first << '\t' << itCount->second << endl; - numRedundants = itCount->second-1; - }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); } - } - - if (createGroup) { + string thisGroup = ""; + if (createGroup) { if(barcodes.size() != 0){ - string thisGroup = barcodeNameVector[barcodeIndex]; + thisGroup = barcodeNameVector[barcodeIndex]; if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { if(thisGroup != "") { @@ -814,52 +792,83 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } } } - - if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); } - - if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; } - else { groupMap[currSeq.getName()] = thisGroup; } - - if (nameFile != "") { - map::iterator itName = nameMap.find(currSeq.getName()); - if (itName != nameMap.end()) { - vector thisSeqsNames; - m->splitAtChar(itName->second, thisSeqsNames, ','); - numRedundants = thisSeqsNames.size()-1; //we already include ourselves below - for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self - outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; - } - }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } - } - - map::iterator it = groupCounts.find(thisGroup); - if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; } - else { groupCounts[it->first] += (1 + numRedundants); } + } + } + + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + currSeq.setAligned(currSeq.getUnaligned()); + currSeq.printSequence(trimFASTAFile); + + if(qFileName != ""){ + currQual.printQScores(trimQualFile); + } + + + if(nameFile != ""){ + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; } + else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } + + int numRedundants = 0; + if (countfile != "") { + map::iterator itCount = nameCount.find(currSeq.getName()); + if (itCount != nameCount.end()) { + trimCountFile << itCount->first << '\t' << itCount->second << endl; + numRedundants = itCount->second-1; + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); } + } + + if (createGroup) { + if(barcodes.size() != 0){ + + if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); } + + if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; } + else { groupMap[currSeq.getName()] = thisGroup; } + + if (nameFile != "") { + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { + vector thisSeqsNames; + m->splitAtChar(itName->second, thisSeqsNames, ','); + numRedundants = thisSeqsNames.size()-1; //we already include ourselves below + for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self + outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; + } + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } + + map::iterator it = groupCounts.find(thisGroup); + if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; } + else { groupCounts[it->first] += (1 + numRedundants); } - } - } - - if(allFiles){ - ofstream output; - m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output); - currSeq.printSequence(output); - output.close(); - - if(qFileName != ""){ - m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output); - currQual.printQScores(output); - output.close(); - } - - if(nameFile != ""){ - map::iterator itName = nameMap.find(currSeq.getName()); - if (itName != nameMap.end()) { - m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output); - output << itName->first << '\t' << itName->second << endl; - output.close(); - }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } - } - } + } + } + + if(allFiles){ + ofstream output; + m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output); + currSeq.printSequence(output); + output.close(); + + if(qFileName != ""){ + m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output); + currQual.printQScores(output); + output.close(); + } + + if(nameFile != ""){ + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { + m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output); + output << itName->first << '\t' << itName->second << endl; + output.close(); + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } + } + } } else{ if(nameFile != ""){ //needs to be before the currSeq name is changed @@ -1505,62 +1514,65 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< string primerName = primerNameVector[itPrimer->second]; string barcodeName = barcodeNameVector[itBar->second]; - string comboGroupName = ""; - string fastaFileName = ""; - string qualFileName = ""; - string nameFileName = ""; - string countFileName = ""; - - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; - } - else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; - } - } - - - ofstream temp; - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); - variables["[tag]"] = comboGroupName; - fastaFileName = getOutputFileName("fasta", variables); - if (uniqueNames.count(fastaFileName) == 0) { - outputNames.push_back(fastaFileName); - outputTypes["fasta"].push_back(fastaFileName); - uniqueNames.insert(fastaFileName); - } - - fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; - m->openOutputFile(fastaFileName, temp); temp.close(); - - if(qFileName != ""){ - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName)); - qualFileName = getOutputFileName("qfile", variables); - if (uniqueNames.count(qualFileName) == 0) { - outputNames.push_back(qualFileName); - outputTypes["qfile"].push_back(qualFileName); - } - - qualFileNames[itBar->second][itPrimer->second] = qualFileName; - m->openOutputFile(qualFileName, temp); temp.close(); - } - - if(nameFile != ""){ - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); - nameFileName = getOutputFileName("name", variables); - if (uniqueNames.count(nameFileName) == 0) { - outputNames.push_back(nameFileName); - outputTypes["name"].push_back(nameFileName); - } - - nameFileNames[itBar->second][itPrimer->second] = nameFileName; - m->openOutputFile(nameFileName, temp); temp.close(); - } + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->second]; + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->second]; + } + else{ + comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + } + } + + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); + variables["[tag]"] = comboGroupName; + fastaFileName = getOutputFileName("fasta", variables); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + } + + fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; + m->openOutputFile(fastaFileName, temp); temp.close(); + + if(qFileName != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName)); + qualFileName = getOutputFileName("qfile", variables); + if (uniqueNames.count(qualFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + + qualFileNames[itBar->second][itPrimer->second] = qualFileName; + m->openOutputFile(qualFileName, temp); temp.close(); + } + + if(nameFile != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); + nameFileName = getOutputFileName("name", variables); + if (uniqueNames.count(nameFileName) == 0) { + outputNames.push_back(nameFileName); + outputTypes["name"].push_back(nameFileName); + } + + nameFileNames[itBar->second][itPrimer->second] = nameFileName; + m->openOutputFile(nameFileName, temp); temp.close(); + } + } } } } diff --git a/trimseqscommand.h b/trimseqscommand.h index 60d29f9..891b14d 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -395,29 +395,8 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ } if(trashCode.length() == 0){ - currSeq.setAligned(currSeq.getUnaligned()); - currSeq.printSequence(trimFASTAFile); - - if(pDataArray->qFileName != ""){ - currQual.printQScores(trimQualFile); - } - - if(pDataArray->nameFile != ""){ - map::iterator itName = pDataArray->nameMap.find(currSeq.getName()); - if (itName != pDataArray->nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; } - else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); } - } - - int numRedundants = 0; - if (pDataArray->countfile != "") { - map::iterator itCount = pDataArray->nameCount.find(currSeq.getName()); - if (itCount != pDataArray->nameCount.end()) { - trimCountFile << itCount->first << '\t' << itCount->second << endl; - numRedundants = itCount->second-1; - }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); } - } - - if (pDataArray->createGroup) { + string thisGroup = ""; + if (pDataArray->createGroup) { if(pDataArray->barcodes.size() != 0){ string thisGroup = pDataArray->barcodeNameVector[barcodeIndex]; if (pDataArray->primers.size() != 0) { @@ -429,50 +408,81 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ } } } - - if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; } - else { pDataArray->groupMap[currSeq.getName()] = thisGroup; } - - if (pDataArray->nameFile != "") { - map::iterator itName = pDataArray->nameMap.find(currSeq.getName()); - if (itName != pDataArray->nameMap.end()) { - vector thisSeqsNames; - pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ','); - numRedundants = thisSeqsNames.size()-1; //we already include ourselves below - for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self - outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; - } - }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); } - } - - map::iterator it = pDataArray->groupCounts.find(thisGroup); - if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1 + numRedundants; } - else { pDataArray->groupCounts[it->first] += (1 + numRedundants); } + } + } + + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + + currSeq.setAligned(currSeq.getUnaligned()); + currSeq.printSequence(trimFASTAFile); + + if(pDataArray->qFileName != ""){ + currQual.printQScores(trimQualFile); + } + + if(pDataArray->nameFile != ""){ + map::iterator itName = pDataArray->nameMap.find(currSeq.getName()); + if (itName != pDataArray->nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; } + else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); } + } + + int numRedundants = 0; + if (pDataArray->countfile != "") { + map::iterator itCount = pDataArray->nameCount.find(currSeq.getName()); + if (itCount != pDataArray->nameCount.end()) { + trimCountFile << itCount->first << '\t' << itCount->second << endl; + numRedundants = itCount->second-1; + }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); } + } + + if (pDataArray->createGroup) { + if(pDataArray->barcodes.size() != 0){ + + if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; } + else { pDataArray->groupMap[currSeq.getName()] = thisGroup; } + + if (pDataArray->nameFile != "") { + map::iterator itName = pDataArray->nameMap.find(currSeq.getName()); + if (itName != pDataArray->nameMap.end()) { + vector thisSeqsNames; + pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ','); + numRedundants = thisSeqsNames.size()-1; //we already include ourselves below + for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self + outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; + } + }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); } + } + + map::iterator it = pDataArray->groupCounts.find(thisGroup); + if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1 + numRedundants; } + else { pDataArray->groupCounts[it->first] += (1 + numRedundants); } + + } + } + + if(pDataArray->allFiles){ + ofstream output; + pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output); + currSeq.printSequence(output); + output.close(); - } - } - - if(pDataArray->allFiles){ - ofstream output; - pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output); - currSeq.printSequence(output); - output.close(); - - if(pDataArray->qFileName != ""){ - pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output); - currQual.printQScores(output); - output.close(); - } - - if(pDataArray->nameFile != ""){ - map::iterator itName = pDataArray->nameMap.find(currSeq.getName()); - if (itName != pDataArray->nameMap.end()) { - pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output); - output << itName->first << '\t' << itName->second << endl; - output.close(); - }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); } - } - } + if(pDataArray->qFileName != ""){ + pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output); + currQual.printQScores(output); + output.close(); + } + + if(pDataArray->nameFile != ""){ + map::iterator itName = pDataArray->nameMap.find(currSeq.getName()); + if (itName != pDataArray->nameMap.end()) { + pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output); + output << itName->first << '\t' << itName->second << endl; + output.close(); + }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); } + } + } + } } else{ if(pDataArray->nameFile != ""){ //needs to be before the currSeq name is changed -- 2.39.2