X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=trimseqscommand.cpp;h=060733036297fb2bd023a2783774218e2f513e80;hp=64d9d9ec161b5615ef59f3ae57429e38bdd49cdd;hb=fefd5ee1517abd3bc38b469cb2dffc85a1571c7e;hpb=1a5c2356c1b955c6ec024b2baf9f46377ee7c72e diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 64d9d9e..0607330 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -12,6 +12,7 @@ #include "trimoligos.h" + //********************************************************************************************************************** vector TrimSeqsCommand::setParameters(){ try { @@ -21,9 +22,10 @@ vector TrimSeqsCommand::setParameters(){ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip); + CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig); CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop); - CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength); + CommandParameter pminlength("minlength", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pminlength); CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); @@ -33,6 +35,7 @@ vector TrimSeqsCommand::setParameters(){ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles); CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward); + CommandParameter plogtransform("logtransform", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plogtransform); CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim); CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold); CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage); @@ -60,9 +63,10 @@ string TrimSeqsCommand::getHelpString(){ string helpString = ""; helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n"; helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n"; - helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n"; + helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n"; helpString += "The fasta parameter is required.\n"; helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"; + helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n"; helpString += "The oligos parameter allows you to provide an oligos file.\n"; helpString += "The name parameter allows you to provide a names file with your fasta file.\n"; helpString += "The count parameter allows you to provide a count file with your fasta file.\n"; @@ -82,6 +86,7 @@ string TrimSeqsCommand::getHelpString(){ helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n"; helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n"; helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n"; + helpString += "The logtransform parameter allows you to indicate you want the averages for the qwindowaverage, rollaverage and qaverage to be calculated using a logtransform. Default=F.\n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n"; helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n"; @@ -109,7 +114,7 @@ string TrimSeqsCommand::getOutputPattern(string type) { else if (type == "fasta") { pattern = "[filename],[tag],fasta"; } else if (type == "group") { pattern = "[filename],groups"; } else if (type == "name") { pattern = "[filename],[tag],names"; } - else if (type == "count") { pattern = "[filename],count_table"; } + else if (type == "count") { pattern = "[filename],[tag],count_table-[filename],count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -123,7 +128,7 @@ string TrimSeqsCommand::getOutputPattern(string type) { TrimSeqsCommand::TrimSeqsCommand(){ try { - abort = true; calledHelp = true; + abort = true; calledHelp = true; setParameters(); vector tempOutNames; outputTypes["fasta"] = tempOutNames; @@ -254,7 +259,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; } m->mothurConvert(temp, maxHomoP); - temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; } + temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "1"; } m->mothurConvert(temp, minLength); temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; } @@ -326,6 +331,12 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; } keepforward = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "logtransform", false); if (temp == "not found") { temp = "F"; } + logtransform = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; } + reorient = m->isTrue(temp); temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); @@ -366,6 +377,7 @@ int TrimSeqsCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } + pairedOligos = false; numFPrimers = 0; //this needs to be initialized numRPrimers = 0; numSpacers = 0; @@ -374,6 +386,7 @@ int TrimSeqsCommand::execute(){ vector > fastaFileNames; vector > qualFileNames; vector > nameFileNames; + map uniqueFastaNames;// so we don't add the same groupfile multiple times map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); @@ -416,7 +429,7 @@ int TrimSeqsCommand::execute(){ if (countfile != "") { CountTable ct; - ct.readTable(countfile); + ct.readTable(countfile, true, false); nameCount = ct.getNameMap(); outputNames.push_back(trimCountFile); outputNames.push_back(scrapCountFile); @@ -429,7 +442,7 @@ int TrimSeqsCommand::execute(){ string outputGroupFileName; if(oligoFile != ""){ - createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames); + createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames, uniqueFastaNames); if ((createGroup) && (countfile == "")){ map myvariables; myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); @@ -437,7 +450,9 @@ int TrimSeqsCommand::execute(){ outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } } - + + if (m->control_pressed) { return 0; } + //fills lines and qlines setLines(fastaFile, qFileName); @@ -451,7 +466,6 @@ int TrimSeqsCommand::execute(){ if (m->control_pressed) { return 0; } if(allFiles){ - map uniqueFastaNames;// so we don't add the same groupfile multiple times map::iterator it; set namesToRemove; for(int i=0;imothurRemove(nameFileNames[i][j]); namesToRemove.insert(nameFileNames[i][j]); } - }else{ - it = uniqueFastaNames.find(fastaFileNames[i][j]); - if (it == uniqueFastaNames.end()) { - uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i]; - } + uniqueFastaNames.erase(fastaFileNames[i][j]); //remove from list for group file print } } } @@ -495,8 +505,8 @@ int TrimSeqsCommand::execute(){ map myvariables; myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first)); string thisGroupName = ""; - if (countfile == "") { thisGroupName = getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); } - else { thisGroupName = getOutputFileName("count",variables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); } + if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); } + else { thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); } m->openOutputFile(thisGroupName, out); if (countfile != "") { out << "Representative_Sequence\ttotal\t" << it->second << endl; } @@ -530,7 +540,7 @@ int TrimSeqsCommand::execute(){ if (countfile != "") { //create countfile with group info included CountTable* ct = new CountTable(); - ct->readTable(trimCountFile); + ct->readTable(trimCountFile, true, false); map justTrimmedNames = ct->getNameMap(); delete ct; @@ -666,11 +676,19 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int count = 0; bool moreSeqs = 1; - TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); - + TrimOligos* trimOligos = NULL; + if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); } + else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); } + + TrimOligos* rtrimOligos = NULL; + if (reorient) { + rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size(); + } + while (moreSeqs) { - if (m->control_pressed) { + if (m->control_pressed) { + delete trimOligos; if (reorient) { delete rtrimOligos; } inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close(); if ((createGroup) && (countfile == "")) { outGroupsFile.close(); } if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); } @@ -684,14 +702,16 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int currentSeqsDiffs = 0; Sequence currSeq(inFASTA); m->gobble(inFASTA); - //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl; + //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl; + Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); - QualityScores currQual; + QualityScores currQual; QualityScores savedQual; if(qFileName != ""){ currQual = QualityScores(qFile); m->gobble(qFile); + savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores()); //cout << currQual.getName() << endl; } - + string origSeq = currSeq.getUnaligned(); if (origSeq != "") { @@ -699,38 +719,79 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int primerIndex = 0; if(numLinkers != 0){ - success = trimOligos.stripLinker(currSeq, currQual); + success = trimOligos->stripLinker(currSeq, currQual); if(success > ldiffs) { trashCode += 'k'; } else{ currentSeqsDiffs += success; } } - if(barcodes.size() != 0){ - success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex); - if(success > bdiffs) { trashCode += 'b'; } + if(numBarcodes != 0){ + success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex); + if(success > bdiffs) { + trashCode += 'b'; + } else{ currentSeqsDiffs += success; } } - + //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl; if(numSpacers != 0){ - success = trimOligos.stripSpacer(currSeq, currQual); + success = trimOligos->stripSpacer(currSeq, currQual); if(success > sdiffs) { trashCode += 's'; } else{ currentSeqsDiffs += success; } } if(numFPrimers != 0){ - success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward); - if(success > pdiffs) { trashCode += 'f'; } + success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward); + if(success > pdiffs) { + trashCode += 'f'; + } else{ currentSeqsDiffs += success; } } if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } if(numRPrimers != 0){ - success = trimOligos.stripReverse(currSeq, currQual); + success = trimOligos->stripReverse(currSeq, currQual); if(!success) { trashCode += 'r'; } } - + + if (reorient && (trashCode != "")) { //if you failed and want to check the reverse + int thisSuccess = 0; + string thisTrashCode = ""; + int thisCurrentSeqsDiffs = 0; + + int thisBarcodeIndex = 0; + int thisPrimerIndex = 0; + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numBarcodes != 0){ + thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); + if(thisSuccess > bdiffs) { thisTrashCode += "b"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numFPrimers != 0){ + thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward); + if(thisSuccess > pdiffs) { thisTrashCode += "f"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } + + if (thisTrashCode == "") { + trashCode = thisTrashCode; + success = thisSuccess; + currentSeqsDiffs = thisCurrentSeqsDiffs; + barcodeIndex = thisBarcodeIndex; + primerIndex = thisPrimerIndex; + savedSeq.reverseComplement(); + currSeq.setAligned(savedSeq.getAligned()); + if(qFileName != ""){ + savedQual.flipQScores(); + currQual.setScores(savedQual.getScores()); + } + }else { trashCode += "(" + thisTrashCode + ")"; } + } + if(keepFirst != 0){ success = keepFirstTrim(currSeq, currQual); } @@ -745,9 +806,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int origLength = currSeq.getNumBases(); if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); } - else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); } - else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); } - else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); } + else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage, logtransform); } + else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage, logtransform); } + else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage, logtransform); } else { success = 1; } //you don't want to trim, if it fails above then scrap it @@ -779,87 +840,83 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } } if(trashCode.length() == 0){ - currSeq.setAligned(currSeq.getUnaligned()); - currSeq.printSequence(trimFASTAFile); - - if(qFileName != ""){ - currQual.printQScores(trimQualFile); - } - + string thisGroup = ""; + if (createGroup) { thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); } - if(nameFile != ""){ - map::iterator itName = nameMap.find(currSeq.getName()); - if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; } - else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } - } - - int numRedundants = 0; - if (countfile != "") { - map::iterator itCount = nameCount.find(currSeq.getName()); - if (itCount != nameCount.end()) { - trimCountFile << itCount->first << '\t' << itCount->second << endl; - numRedundants = itCount->second-1; - }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); } - } - - if (createGroup) { - if(barcodes.size() != 0){ - string thisGroup = barcodeNameVector[barcodeIndex]; - if (primers.size() != 0) { - if (primerNameVector[primerIndex] != "") { - if(thisGroup != "") { - thisGroup += "." + primerNameVector[primerIndex]; - }else { - thisGroup = primerNameVector[primerIndex]; - } - } - } - - if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); } - - if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; } - else { groupMap[currSeq.getName()] = thisGroup; } - - if (nameFile != "") { - map::iterator itName = nameMap.find(currSeq.getName()); - if (itName != nameMap.end()) { - vector thisSeqsNames; - m->splitAtChar(itName->second, thisSeqsNames, ','); - numRedundants = thisSeqsNames.size()-1; //we already include ourselves below - for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self - outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; - } - }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } - } - - map::iterator it = groupCounts.find(thisGroup); - if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; } - else { groupCounts[it->first] += (1 + numRedundants); } + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + currSeq.setAligned(currSeq.getUnaligned()); + currSeq.printSequence(trimFASTAFile); + + if(qFileName != ""){ + currQual.printQScores(trimQualFile); + } + + + if(nameFile != ""){ + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; } + else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } + + int numRedundants = 0; + if (countfile != "") { + map::iterator itCount = nameCount.find(currSeq.getName()); + if (itCount != nameCount.end()) { + trimCountFile << itCount->first << '\t' << itCount->second << endl; + numRedundants = itCount->second-1; + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); } + } + + if (createGroup) { + if(numBarcodes != 0){ + + if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); } + + if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; } + else { groupMap[currSeq.getName()] = thisGroup; } + + if (nameFile != "") { + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { + vector thisSeqsNames; + m->splitAtChar(itName->second, thisSeqsNames, ','); + numRedundants = thisSeqsNames.size()-1; //we already include ourselves below + for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self + outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl; + } + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } + + map::iterator it = groupCounts.find(thisGroup); + if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; } + else { groupCounts[it->first] += (1 + numRedundants); } - } - } - - if(allFiles){ - ofstream output; - m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output); - currSeq.printSequence(output); - output.close(); - - if(qFileName != ""){ - m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output); - currQual.printQScores(output); - output.close(); - } - - if(nameFile != ""){ - map::iterator itName = nameMap.find(currSeq.getName()); - if (itName != nameMap.end()) { - m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output); - output << itName->first << '\t' << itName->second << endl; - output.close(); - }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } - } - } + } + } + + if(allFiles){ + ofstream output; + m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output); + currSeq.printSequence(output); + output.close(); + + if(qFileName != ""){ + m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output); + currQual.printQScores(output); + output.close(); + } + + if(nameFile != ""){ + map::iterator itName = nameMap.find(currSeq.getName()); + if (itName != nameMap.end()) { + m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output); + output << itName->first << '\t' << itName->second << endl; + output.close(); + }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); } + } + } + } } else{ if(nameFile != ""){ //needs to be before the currSeq name is changed @@ -900,7 +957,8 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string //report progress if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } - + delete trimOligos; + if (reorient) { delete rtrimOligos; } inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close(); @@ -929,7 +987,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { - int pid = fork(); + pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later @@ -946,15 +1004,15 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName for(int i=0;imothurGetpid(process) + ".temp"; m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); if(qFileName != ""){ - tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp"; + tempPrimerQualFileNames[i][j] += m->mothurGetpid(process) + ".temp"; m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); } if(nameFile != ""){ - tempNameFileNames[i][j] += toString(getpid()) + ".temp"; + tempNameFileNames[i][j] += m->mothurGetpid(process) + ".temp"; m->openOutputFile(tempNameFileNames[i][j], temp); temp.close(); } } @@ -964,27 +1022,27 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName driverCreateTrim(filename, qFileName, - (trimFASTAFileName + toString(getpid()) + ".temp"), - (scrapFASTAFileName + toString(getpid()) + ".temp"), - (trimQualFileName + toString(getpid()) + ".temp"), - (scrapQualFileName + toString(getpid()) + ".temp"), - (trimNameFileName + toString(getpid()) + ".temp"), - (scrapNameFileName + toString(getpid()) + ".temp"), - (trimCountFileName + toString(getpid()) + ".temp"), - (scrapCountFileName + toString(getpid()) + ".temp"), - (groupFile + toString(getpid()) + ".temp"), + (trimFASTAFileName + m->mothurGetpid(process) + ".temp"), + (scrapFASTAFileName + m->mothurGetpid(process) + ".temp"), + (trimQualFileName + m->mothurGetpid(process) + ".temp"), + (scrapQualFileName + m->mothurGetpid(process) + ".temp"), + (trimNameFileName + m->mothurGetpid(process) + ".temp"), + (scrapNameFileName + m->mothurGetpid(process) + ".temp"), + (trimCountFileName + m->mothurGetpid(process) + ".temp"), + (scrapCountFileName + m->mothurGetpid(process) + ".temp"), + (groupFile + m->mothurGetpid(process) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[process], qLines[process]); - if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); } + if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + m->mothurGetpid(process) + '\n'); } //pass groupCounts to parent if(createGroup){ ofstream out; - string tempFile = filename + toString(getpid()) + ".num.temp"; + string tempFile = filename + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << groupCounts.size() << endl; @@ -1088,10 +1146,10 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName tempPrimerQualFileNames, tempNameFileNames, lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m, - pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, - primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast, - qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, - minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount); + pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, oligoFile, + createGroup, allFiles, keepforward, keepFirst, removeLast, + qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform, + minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount); pDataArray.push_back(tempTrim); hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]); @@ -1143,6 +1201,9 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->count != pDataArray[i]->lineEnd) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; + } for (map::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) { map::iterator it2 = groupCounts.find(it->first); if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; } @@ -1304,6 +1365,8 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { string sname = ""; nameStream >> sname; sname = sname.substr(1); + m->checkName(sname); + map::iterator it = firstSeqNames.find(sname); if(it != firstSeqNames.end()) { //this is the start of a new chunk @@ -1395,21 +1458,218 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { //*************************************************************************************************************** -bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector >& qualFileNames, vector >& nameFileNames){ +bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector >& qualFileNames, vector >& nameFileNames, map& fastaFile2Group){ try { - ifstream inOligos; + + bool allBlank = false; + oligos.read(oligoFile); + + if (m->control_pressed) { return false; } //error in reading oligos + + if (oligos.hasPairedBarcodes()) { + pairedOligos = true; + numFPrimers = oligos.getPairedPrimers().size(); + numBarcodes = oligos.getPairedBarcodes().size(); + }else { + pairedOligos = false; + numFPrimers = oligos.getPrimers().size(); + numBarcodes = oligos.getBarcodes().size(); + } + + numLinkers = oligos.getLinkers().size(); + numSpacers = oligos.getSpacers().size(); + numRPrimers = oligos.getReversePrimers().size(); + + vector groupNames = oligos.getGroupNames(); + if (groupNames.size() == 0) { allFiles = 0; allBlank = true; } + + + fastaFileNames.resize(oligos.getBarcodeNames().size()); + for(int i=0;i uniqueNames; //used to cleanup outputFileNames + if (pairedOligos) { + map barcodes = oligos.getPairedBarcodes(); + map primers = oligos.getPairedPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = oligos.getPrimerName(itPrimer->first); + string barcodeName = oligos.getBarcodeName(itBar->first); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; + } + else{ + comboGroupName = barcodeName + "." + primerName; + } + } + + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); + variables["[tag]"] = comboGroupName; + fastaFileName = getOutputFileName("fasta", variables); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + fastaFile2Group[fastaFileName] = comboGroupName; + } + + fastaFileNames[itBar->first][itPrimer->first] = fastaFileName; + m->openOutputFile(fastaFileName, temp); temp.close(); + + if(qFileName != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName)); + qualFileName = getOutputFileName("qfile", variables); + if (uniqueNames.count(qualFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + + qualFileNames[itBar->first][itPrimer->first] = qualFileName; + m->openOutputFile(qualFileName, temp); temp.close(); + } + + if(nameFile != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); + nameFileName = getOutputFileName("name", variables); + if (uniqueNames.count(nameFileName) == 0) { + outputNames.push_back(nameFileName); + outputTypes["name"].push_back(nameFileName); + } + + nameFileNames[itBar->first][itPrimer->first] = nameFileName; + m->openOutputFile(nameFileName, temp); temp.close(); + } + } + } + } + }else { + map barcodes = oligos.getBarcodes() ; + map primers = oligos.getPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = oligos.getPrimerName(itPrimer->second); + string barcodeName = oligos.getBarcodeName(itBar->second); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; + } + else{ + comboGroupName = barcodeName + "." + primerName; + } + } + + + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); + variables["[tag]"] = comboGroupName; + fastaFileName = getOutputFileName("fasta", variables); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + fastaFile2Group[fastaFileName] = comboGroupName; + } + + fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; + m->openOutputFile(fastaFileName, temp); temp.close(); + + if(qFileName != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName)); + qualFileName = getOutputFileName("qfile", variables); + if (uniqueNames.count(qualFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + + qualFileNames[itBar->second][itPrimer->second] = qualFileName; + m->openOutputFile(qualFileName, temp); temp.close(); + } + + if(nameFile != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); + nameFileName = getOutputFileName("name", variables); + if (uniqueNames.count(nameFileName) == 0) { + outputNames.push_back(nameFileName); + outputTypes["name"].push_back(nameFileName); + } + + nameFileNames[itBar->second][itPrimer->second] = nameFileName; + m->openOutputFile(nameFileName, temp); temp.close(); + } + } + } + } + } + + } + + + + if (allBlank) { + m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine(); + allFiles = false; + return false; + } + + + + /*ifstream inOligos; m->openInputFile(oligoFile, inOligos); ofstream test; - string type, oligo, group; + string type, oligo, roligo, group; + bool hasPrimer = false; bool hasPairedBarcodes = false; int indexPrimer = 0; int indexBarcode = 0; + int indexPairedPrimer = 0; + int indexPairedBarcode = 0; + set uniquePrimers; + set uniqueBarcodes; while(!inOligos.eof()){ - inOligos >> type; + inOligos >> type; if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } @@ -1437,7 +1697,7 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< // get rest of line in case there is a primer name while (!inOligos.eof()) { char c = inOligos.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } } @@ -1451,6 +1711,42 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< primers[oligo]=indexPrimer; indexPrimer++; primerNameVector.push_back(group); } + else if (type == "PRIMER"){ + m->gobble(inOligos); + + inOligos >> roligo; + + for(int i=0;idebug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); } + + //check for repeat barcodes + string tempPair = oligo+roligo; + if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); } + else { uniquePrimers.insert(tempPair); } + + if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } } + + pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++; + primerNameVector.push_back(group); + hasPrimer = true; + } else if(type == "REVERSE"){ //Sequence oligoRC("reverse", oligo); //oligoRC.reverseComplement(); @@ -1459,13 +1755,49 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< } else if(type == "BARCODE"){ inOligos >> group; - - //check for repeat barcodes - map::iterator itBar = barcodes.find(oligo); - if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - barcodes[oligo]=indexBarcode; indexBarcode++; - barcodeNameVector.push_back(group); + //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs + //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info + + string temp = ""; + while (!inOligos.eof()) { + char c = inOligos.get(); + if (c == 10 || c == 13 || c == -1){ break; } + else if (c == 32 || c == 9){;} //space or tab + else { temp += c; } + } + + //then this is illumina data with 4 columns + if (temp != "") { + hasPairedBarcodes = true; + string reverseBarcode = group; //reverseOligo(group); //reverse barcode + group = temp; + + for(int i=0;idebug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); } + + //check for repeat barcodes + string tempPair = oligo+reverseBarcode; + if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); } + else { uniqueBarcodes.insert(tempPair); } + + pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++; + barcodeNameVector.push_back(group); + }else { + //check for repeat barcodes + map::iterator itBar = barcodes.find(oligo); + if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } + + barcodes[oligo]=indexBarcode; indexBarcode++; + barcodeNameVector.push_back(group); + } }else if(type == "LINKER"){ linker.push_back(oligo); }else if(type == "SPACER"){ @@ -1477,8 +1809,13 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< } inOligos.close(); + if (hasPairedBarcodes || hasPrimer) { + pairedOligos = true; + if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; } + } + if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; } - + //add in potential combos if(barcodeNameVector.size() == 0){ barcodes[""] = 0; @@ -1499,97 +1836,146 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< if(allFiles){ set uniqueNames; //used to cleanup outputFileNames - for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ - for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - - string primerName = primerNameVector[itPrimer->second]; - string barcodeName = barcodeNameVector[itBar->second]; - - string comboGroupName = ""; - string fastaFileName = ""; - string qualFileName = ""; - string nameFileName = ""; - string countFileName = ""; - - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; - } - else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; - } - } - - - ofstream temp; - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); - variables["[tag]"] = comboGroupName; - fastaFileName = getOutputFileName("fasta", variables); - if (uniqueNames.count(fastaFileName) == 0) { - outputNames.push_back(fastaFileName); - outputTypes["fasta"].push_back(fastaFileName); - uniqueNames.insert(fastaFileName); - } - - fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; - m->openOutputFile(fastaFileName, temp); temp.close(); - - if(qFileName != ""){ - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName)); - qualFileName = getOutputFileName("qfile", variables); - if (uniqueNames.count(qualFileName) == 0) { - outputNames.push_back(qualFileName); - outputTypes["qfile"].push_back(qualFileName); - } - - qualFileNames[itBar->second][itPrimer->second] = qualFileName; - m->openOutputFile(qualFileName, temp); temp.close(); - } - - if(nameFile != ""){ - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); - nameFileName = getOutputFileName("name", variables); - if (uniqueNames.count(nameFileName) == 0) { - outputNames.push_back(nameFileName); - outputTypes["name"].push_back(nameFileName); - } - - nameFileNames[itBar->second][itPrimer->second] = nameFileName; - m->openOutputFile(nameFileName, temp); temp.close(); - } - } - } - } - numFPrimers = primers.size(); - numRPrimers = revPrimer.size(); - numLinkers = linker.size(); - numSpacers = spacer.size(); - - bool allBlank = true; - for (int i = 0; i < barcodeNameVector.size(); i++) { - if (barcodeNameVector[i] != "") { - allBlank = false; - break; - } - } - for (int i = 0; i < primerNameVector.size(); i++) { - if (primerNameVector[i] != "") { - allBlank = false; - break; - } - } - - if (allBlank) { - m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine(); - allFiles = false; - return false; + if (pairedOligos) { + for(map::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){ + for(map::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){ + + string primerName = primerNameVector[itPrimer->first]; + string barcodeName = barcodeNameVector[itBar->first]; + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->first]; + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->first]; + } + else{ + comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first]; + } + } + + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); + variables["[tag]"] = comboGroupName; + fastaFileName = getOutputFileName("fasta", variables); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + } + + fastaFileNames[itBar->first][itPrimer->first] = fastaFileName; + m->openOutputFile(fastaFileName, temp); temp.close(); + + if(qFileName != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName)); + qualFileName = getOutputFileName("qfile", variables); + if (uniqueNames.count(qualFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + + qualFileNames[itBar->first][itPrimer->first] = qualFileName; + m->openOutputFile(qualFileName, temp); temp.close(); + } + + if(nameFile != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); + nameFileName = getOutputFileName("name", variables); + if (uniqueNames.count(nameFileName) == 0) { + outputNames.push_back(nameFileName); + outputTypes["name"].push_back(nameFileName); + } + + nameFileNames[itBar->first][itPrimer->first] = nameFileName; + m->openOutputFile(nameFileName, temp); temp.close(); + } + } + } + } + }else { + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = primerNameVector[itPrimer->second]; + string barcodeName = barcodeNameVector[itBar->second]; + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->second]; + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->second]; + } + else{ + comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + } + } + + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); + variables["[tag]"] = comboGroupName; + fastaFileName = getOutputFileName("fasta", variables); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + } + + fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; + m->openOutputFile(fastaFileName, temp); temp.close(); + + if(qFileName != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName)); + qualFileName = getOutputFileName("qfile", variables); + if (uniqueNames.count(qualFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + + qualFileNames[itBar->second][itPrimer->second] = qualFileName; + m->openOutputFile(qualFileName, temp); temp.close(); + } + + if(nameFile != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); + nameFileName = getOutputFileName("name", variables); + if (uniqueNames.count(nameFileName) == 0) { + outputNames.push_back(nameFileName); + outputTypes["name"].push_back(nameFileName); + } + + nameFileNames[itBar->second][itPrimer->second] = nameFileName; + m->openOutputFile(nameFileName, temp); temp.close(); + } + } + } + } + } } - - return true; + */ + return true; } catch(exception& e) { @@ -1605,7 +1991,13 @@ bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){ if(qscores.getName() != ""){ qscores.trimQScores(-1, keepFirst); } + +// sequence.printSequence(cout);cout << endl; + sequence.trim(keepFirst); + +// sequence.printSequence(cout);cout << endl << endl;; + return success; } catch(exception& e) { @@ -1683,46 +2075,6 @@ bool TrimSeqsCommand::cullHomoP(Sequence& seq){ } } -//********************************************************************/ -string TrimSeqsCommand::reverseOligo(string oligo){ - try { - string reverse = ""; - - for(int i=oligo.length()-1;i>=0;i--){ - - if(oligo[i] == 'A') { reverse += 'T'; } - else if(oligo[i] == 'T'){ reverse += 'A'; } - else if(oligo[i] == 'U'){ reverse += 'A'; } - - else if(oligo[i] == 'G'){ reverse += 'C'; } - else if(oligo[i] == 'C'){ reverse += 'G'; } - - else if(oligo[i] == 'R'){ reverse += 'Y'; } - else if(oligo[i] == 'Y'){ reverse += 'R'; } - - else if(oligo[i] == 'M'){ reverse += 'K'; } - else if(oligo[i] == 'K'){ reverse += 'M'; } - - else if(oligo[i] == 'W'){ reverse += 'W'; } - else if(oligo[i] == 'S'){ reverse += 'S'; } - - else if(oligo[i] == 'B'){ reverse += 'V'; } - else if(oligo[i] == 'V'){ reverse += 'B'; } - - else if(oligo[i] == 'D'){ reverse += 'H'; } - else if(oligo[i] == 'H'){ reverse += 'D'; } - - else { reverse += 'N'; } - } - - - return reverse; - } - catch(exception& e) { - m->errorOut(e, "TrimSeqsCommand", "reverseOligo"); - exit(1); - } -} //***************************************************************************************************************