X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=0e0ed587343e1a02288f7a49b9af18c8dcf846c5;hb=348de0f8b17d84ede77081dcf67bd6ef43496677;hp=acbead2ed3a77171980ab49e4d8e098dce3c52d8;hpb=75c5a235ac3eb22e0f97d36874f4b2dcf9591f2e;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index acbead2..0e0ed58 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -11,10 +11,13 @@ #include "needlemanoverlap.hpp" //********************************************************************************************************************** + vector TrimSeqsCommand::getValidParameters(){ try { - string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", - "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; + string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop","minlength", "maxlength", "qfile", + "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", + "keepfirst", "removelast", + "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -23,14 +26,15 @@ vector TrimSeqsCommand::getValidParameters(){ exit(1); } } + //********************************************************************************************************************** + TrimSeqsCommand::TrimSeqsCommand(){ try { - abort = true; - //initialize outputTypes + abort = true; calledHelp = true; vector tempOutNames; outputTypes["fasta"] = tempOutNames; - outputTypes["qual"] = tempOutNames; + outputTypes["qfile"] = tempOutNames; outputTypes["group"] = tempOutNames; } catch(exception& e) { @@ -38,7 +42,9 @@ TrimSeqsCommand::TrimSeqsCommand(){ exit(1); } } + //********************************************************************************************************************** + vector TrimSeqsCommand::getRequiredParameters(){ try { string Array[] = {"fasta"}; @@ -50,7 +56,9 @@ vector TrimSeqsCommand::getRequiredParameters(){ exit(1); } } + //********************************************************************************************************************** + vector TrimSeqsCommand::getRequiredFiles(){ try { vector myArray; @@ -61,21 +69,24 @@ vector TrimSeqsCommand::getRequiredFiles(){ exit(1); } } + //*************************************************************************************************************** TrimSeqsCommand::TrimSeqsCommand(string option) { try { - abort = false; + abort = false; calledHelp = false; comboStarts = 0; //allow user to run help - if(option == "help") { help(); abort = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } else { //valid paramters for this command - string AlignArray[] = {"fasta", "flip", "group","oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", - "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; + string AlignArray[] = { "fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", + "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", + "keepfirst", "removelast", + "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); @@ -93,7 +104,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["fasta"] = tempOutNames; - outputTypes["qual"] = tempOutNames; + outputTypes["qfile"] = tempOutNames; outputTypes["group"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter @@ -125,13 +136,6 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { if (path == "") { parameters["qfile"] = inputDir + it->second; } } - it = parameters.find("group"); - //user has given a template file - if(it != parameters.end()){ - path = m->hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["group"] = inputDir + it->second; } - } } @@ -159,10 +163,6 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else if(temp == "not open"){ abort = true; } else { oligoFile = temp; } - temp = validParameter.validFile(parameters, "group", true); - if (temp == "not found"){ groupfile = ""; } - else if(temp == "not open"){ abort = true; } - else { groupfile = temp; } temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; } convert(temp, maxAmbig); @@ -195,7 +195,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; } convert(temp, qThreshold); - temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "F"; } + temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; } qtrim = m->isTrue(temp); temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; } @@ -204,14 +204,20 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; } convert(temp, qWindowAverage); - temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "100"; } + temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; } convert(temp, qWindowSize); - temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "10"; } + temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; } convert(temp, qWindowStep); temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; } convert(temp, qAverage); + + temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; } + convert(temp, keepFirst); + + temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; } + convert(temp, removeLast); temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; } allFiles = m->isTrue(temp); @@ -219,13 +225,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } convert(temp, processors); - if ((oligoFile != "") && (groupfile != "")) { - m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true; - } - - if(allFiles && (oligoFile == "") && (groupfile == "")){ - m->mothurOut("You selected allfiles, but didn't enter an oligos or group file. Ignoring the allfiles request."); m->mothurOutEndLine(); + if(allFiles && (oligoFile == "")){ + m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine(); } if((qAverage != 0 && qThreshold != 0) && qFileName == ""){ m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine(); @@ -244,28 +246,35 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { exit(1); } } + //********************************************************************************************************************** void TrimSeqsCommand::help(){ try { - m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n"); - m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n"); + m->mothurOut("The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n"); + m->mothurOut("The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n"); + m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n"); m->mothurOut("The fasta parameter is required.\n"); - m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n"); m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"); - m->mothurOut("The oligos parameter .... The default is "".\n"); - m->mothurOut("The maxambig parameter .... The default is -1.\n"); - m->mothurOut("The maxhomop parameter .... The default is 0.\n"); - m->mothurOut("The minlength parameter .... The default is 0.\n"); - m->mothurOut("The maxlength parameter .... The default is 0.\n"); + m->mothurOut("The oligos parameter allows you to provide an oligos file.\n"); + m->mothurOut("The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n"); + m->mothurOut("The maxhomop parameter allows you to set a maximum homopolymer length. \n"); + m->mothurOut("The minlength parameter allows you to set and minimum sequence length. \n"); + m->mothurOut("The maxlength parameter allows you to set and maximum sequence length. \n"); m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n"); m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"); m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"); - m->mothurOut("The qfile parameter .....\n"); - m->mothurOut("The qthreshold parameter .... The default is 0.\n"); - m->mothurOut("The qaverage parameter .... The default is 0.\n"); - m->mothurOut("The allfiles parameter .... The default is F.\n"); - m->mothurOut("The qtrim parameter .... The default is F.\n"); + m->mothurOut("The qfile parameter allows you to provide a quality file.\n"); + m->mothurOut("The qthreshold parameter allows you to set a minimum quality score allowed. \n"); + m->mothurOut("The qaverage parameter allows you to set a minimum average quality score allowed. \n"); + m->mothurOut("The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n"); + m->mothurOut("The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n"); + m->mothurOut("The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n"); + m->mothurOut("The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n"); + m->mothurOut("The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"); + m->mothurOut("The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n"); + m->mothurOut("The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n"); + m->mothurOut("The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n"); m->mothurOut("The trim.seqs command should be in the following format: \n"); m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n"); m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n"); @@ -290,54 +299,35 @@ TrimSeqsCommand::~TrimSeqsCommand(){ /* do nothing */ } int TrimSeqsCommand::execute(){ try{ - if (abort == true) { return 0; } + if (abort == true) { if (calledHelp) { return 0; } return 2; } numFPrimers = 0; //this needs to be initialized numRPrimers = 0; - vector fastaFileNames; - vector qualFileNames; + vector > fastaFileNames; + vector > qualFileNames; string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta"; outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); + string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta"; outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile); + string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual"; string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual"; - if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); } - string groupFile = ""; - if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; } - else{ - groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups"; - outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); - groupMap = new GroupMap(groupfile); - groupMap->readMap(); - - if(allFiles){ - for (int i = 0; i < groupMap->namesOfGroups.size(); i++) { - groupToIndex[groupMap->namesOfGroups[i]] = i; - groupVector.push_back(groupMap->namesOfGroups[i]); - fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupMap->namesOfGroups[i] + ".fasta")); - - //we append later, so we want to clear file - ofstream outRemove; - m->openOutputFile(fastaFileNames[i], outRemove); - outRemove.close(); - if(qFileName != ""){ - qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupMap->namesOfGroups[i] + ".qual")); - ofstream outRemove2; - m->openOutputFile(qualFileNames[i], outRemove2); - outRemove2.close(); - } - } - } - comboStarts = fastaFileNames.size()-1; + if (qFileName != "") { + outputNames.push_back(trimQualFile); + outputNames.push_back(scrapQualFile); + outputTypes["qfile"].push_back(trimQualFile); + outputTypes["qfile"].push_back(scrapQualFile); } + string outputGroupFileName; if(oligoFile != ""){ - outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); + outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; + outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); getOligos(fastaFileNames, qualFileNames); } - + vector fastaFilePos; vector qFilePos; @@ -351,68 +341,93 @@ int TrimSeqsCommand::execute(){ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ - driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); + driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]); }else{ - createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); + createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames); } #else - driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); + driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]); #endif if (m->control_pressed) { return 0; } - set blanks; - for(int i=0;iisBlank(fastaFileNames[i])) { blanks.insert(fastaFileNames[i]); } - else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); } - else { - ifstream inFASTA; - string seqName; - m->openInputFile(fastaFileNames[i], inFASTA); - ofstream outGroups; - string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups"; - - //if the fastafile is on the blanks list then the groups file should be as well - if (blanks.count(fastaFileNames[i]) != 0) { blanks.insert(outGroupFilename); } + if(allFiles){ + map uniqueFastaNames;// so we don't add the same groupfile multiple times + map::iterator it; + set namesToRemove; + for(int i=0;iisBlank(fastaFileNames[i][j])){ + remove(fastaFileNames[i][j].c_str()); + namesToRemove.insert(fastaFileNames[i][j]); + + if(qFileName != ""){ + remove(qualFileNames[i][j].c_str()); + namesToRemove.insert(qualFileNames[i][j]); + } + }else{ + it = uniqueFastaNames.find(fastaFileNames[i][j]); + if (it == uniqueFastaNames.end()) { + uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i]; + } + } + } + } + } + + //remove names for outputFileNames, just cleans up the output + vector outputNames2; + for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } } + outputNames = outputNames2; + + for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) { + ifstream in; + m->openInputFile(it->first, in); - m->openOutputFile(outGroupFilename, outGroups); - outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename); + ofstream out; + string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups"; + outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); + m->openOutputFile(thisGroupName, out); - string thisGroup = ""; - if (i > comboStarts) { - map::iterator itCombo; - for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){ - if(itCombo->second == i){ thisGroup = itCombo->first; combos.erase(itCombo); break; } - } - }else{ thisGroup = groupVector[i]; } + while (!in.eof()){ + if (m->control_pressed) { break; } - while(!inFASTA.eof()){ - if(inFASTA.get() == '>'){ - inFASTA >> seqName; - outGroups << seqName << '\t' << thisGroup << endl; - } - while (!inFASTA.eof()) { char c = inFASTA.get(); if (c == 10 || c == 13){ break; } } + Sequence currSeq(in); m->gobble(in); + out << currSeq.getName() << '\t' << it->second << endl; } - outGroups.close(); - inFASTA.close(); + in.close(); + out.close(); } } - for (set::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) { remove((*(itBlanks)).c_str()); } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + //output group counts + m->mothurOutEndLine(); + //int total = 0; +// for (int i = 0; i < barcodeNameVector.size(); i++) { +// if ((barcodeNameVector[i] != "") && (groupCounts[i] != 0)) { total += groupCounts[i]; m->mothurOut("Group " + barcodeNameVector[i] + " contains " + toString(groupCounts[i]) + " sequences."); m->mothurOutEndLine(); } +// } +// if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); } - blanks.clear(); - if(qFileName != ""){ - for(int i=0;iisBlank(qualFileNames[i])) { blanks.insert(qualFileNames[i]); } - else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); } - } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + //set fasta file as new current fastafile + string current = ""; + itTypes = outputTypes.find("fasta"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } - for (set::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) { remove((*(itBlanks)).c_str()); } + itTypes = outputTypes.find("qfile"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); } + } - if (m->control_pressed) { - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - return 0; + itTypes = outputTypes.find("group"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } } m->mothurOutEndLine(); @@ -431,27 +446,37 @@ int TrimSeqsCommand::execute(){ /**************************************************************************************/ -int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector fastaNames, vector qualNames, linePair* line, linePair* qline) { +int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector > fastaFileNames, vector > qualFileNames, linePair* line, linePair* qline) { try { - ofstream outFASTA; - int able = m->openOutputFile(trimFile, outFASTA); + ofstream trimFASTAFile; + m->openOutputFile(trimFileName, trimFASTAFile); - ofstream scrapFASTA; - m->openOutputFile(scrapFile, scrapFASTA); + ofstream scrapFASTAFile; + m->openOutputFile(scrapFileName, scrapFASTAFile); - ofstream outQual; - ofstream scrapQual; + ofstream trimQualFile; + ofstream scrapQualFile; if(qFileName != ""){ - m->openOutputFile(trimQFile, outQual); - m->openOutputFile(scrapQFile, scrapQual); + m->openOutputFile(trimQFileName, trimQualFile); + m->openOutputFile(scrapQFileName, scrapQualFile); } - ofstream outGroups; - - if (oligoFile != "") { - m->openOutputFile(groupFile, outGroups); + ofstream outGroupsFile; + if (oligoFile != ""){ m->openOutputFile(groupFileName, outGroupsFile); } + if(allFiles){ + for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file + for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file + if (fastaFileNames[i][j] != "") { + ofstream temp; + m->openOutputFile(fastaFileNames[i][j], temp); temp.close(); + if(qFileName != ""){ + m->openOutputFile(qualFileNames[i][j], temp); temp.close(); + } + } + } + } } ifstream inFASTA; @@ -459,29 +484,19 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string inFASTA.seekg(line->start); ifstream qFile; - if(qFileName != "") { m->openInputFile(qFileName, qFile); qFile.seekg(qline->start); } - - - for (int i = 0; i < fastaNames.size(); i++) { //clears old file - ofstream temp; - m->openOutputFile(fastaNames[i], temp); - temp.close(); - } - for (int i = 0; i < qualNames.size(); i++) { //clears old file - ofstream temp; - m->openOutputFile(qualNames[i], temp); - temp.close(); + if(qFileName != "") { + m->openInputFile(qFileName, qFile); + qFile.seekg(qline->start); } - - bool done = false; int count = 0; + bool moreSeqs = 1; - while (!done) { + while (moreSeqs) { if (m->control_pressed) { - inFASTA.close(); outFASTA.close(); scrapFASTA.close(); - if (oligoFile != "") { outGroups.close(); } + inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close(); + if (oligoFile != "") { outGroupsFile.close(); } if(qFileName != ""){ qFile.close(); @@ -492,52 +507,65 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } int success = 1; - + string trashCode = ""; + int currentSeqsDiffs = 0; Sequence currSeq(inFASTA); m->gobble(inFASTA); QualityScores currQual; if(qFileName != ""){ - currQual = QualityScores(qFile, currSeq.getNumBases()); m->gobble(qFile); + currQual = QualityScores(qFile); m->gobble(qFile); } - + string origSeq = currSeq.getUnaligned(); if (origSeq != "") { - int groupBar, groupPrime; - string trashCode = ""; - int currentSeqsDiffs = 0; - - if(qFileName != ""){ - if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); } - else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); } - else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); } - else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); } - - if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { - success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap - } - - if(!success) { trashCode += 'q'; } - } - + + int barcodeIndex = 0; + int primerIndex = 0; + if(barcodes.size() != 0){ - success = stripBarcode(currSeq, currQual, groupBar); + success = stripBarcode(currSeq, currQual, barcodeIndex); if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } - + if(numFPrimers != 0){ - success = stripForward(currSeq, currQual, groupPrime); + success = stripForward(currSeq, currQual, primerIndex); if(success > pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } } if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } - + if(numRPrimers != 0){ success = stripReverse(currSeq, currQual); if(!success) { trashCode += 'r'; } } + + if(keepFirst != 0){ + success = keepFirstTrim(currSeq, currQual); + } + + if(removeLast != 0){ + success = removeLastTrim(currSeq, currQual); + if(!success) { trashCode += 'l'; } + } + + + if(qFileName != ""){ + int origLength = currSeq.getNumBases(); + + if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); } + else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); } + else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); } + else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); } + else { success = 1; } + + //you don't want to trim, if it fails above then scrap it + if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; } + + if(!success) { trashCode += 'q'; } + } if(minLength > 0 || maxLength > 0){ success = cullLength(currSeq); @@ -552,64 +580,37 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(!success) { trashCode += 'n'; } } - if(flip){ currSeq.reverseComplement(); currQual.flipQScores(); } // should go last + if(flip){ // should go last + currSeq.reverseComplement(); + if(qFileName != ""){ + currQual.flipQScores(); + } + } if(trashCode.length() == 0){ currSeq.setAligned(currSeq.getUnaligned()); - currSeq.printSequence(outFASTA); - currQual.printQScores(outQual); + currSeq.printSequence(trimFASTAFile); + + if(qFileName != ""){ + currQual.printQScores(trimQualFile); + } if(barcodes.size() != 0){ - string thisGroup = groupVector[groupBar]; - int indexToFastaFile = groupBar; - if (primers.size() != 0){ - //does this primer have a group - if (groupVector[groupPrime] != "") { - thisGroup += "." + groupVector[groupPrime]; - indexToFastaFile = combos[thisGroup]; - } - } - outGroups << currSeq.getName() << '\t' << thisGroup << endl; - if(allFiles){ - ofstream outTemp; - m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp); - //currSeq.printSequence(*fastaFileNames[indexToFastaFile]); - currSeq.printSequence(outTemp); - outTemp.close(); - - if(qFileName != ""){ - //currQual.printQScores(*qualFileNames[indexToFastaFile]); - ofstream outTemp2; - m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2); - currQual.printQScores(outTemp2); - outTemp2.close(); - } - } + outGroupsFile << currSeq.getName() << '\t' << barcodeNameVector[barcodeIndex] << endl; + groupCounts[barcodeIndex]++; } - if (groupfile != "") { - string thisGroup = groupMap->getGroup(currSeq.getName()); + + if(allFiles){ + ofstream output; + m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output); + currSeq.printSequence(output); + output.close(); - if (thisGroup != "not found") { - outGroups << currSeq.getName() << '\t' << thisGroup << endl; - if (allFiles) { - ofstream outTemp; - m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp); - currSeq.printSequence(outTemp); - outTemp.close(); - if(qFileName != ""){ - ofstream outTemp2; - m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2); - currQual.printQScores(outTemp2); - outTemp2.close(); - } - } - }else{ - m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine(); - outGroups << currSeq.getName() << '\t' << "XXX" << endl; - if (allFiles) { - m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine(); - } + if(qFileName != ""){ + m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output); + currQual.printQScores(output); + output.close(); } } } @@ -617,8 +618,10 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string currSeq.setName(currSeq.getName() + '|' + trashCode); currSeq.setUnaligned(origSeq); currSeq.setAligned(origSeq); - currSeq.printSequence(scrapFASTA); - currQual.printQScores(scrapQual); + currSeq.printSequence(scrapFASTAFile); + if(qFileName != ""){ + currQual.printQScores(scrapQualFile); + } } count++; } @@ -639,10 +642,10 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string inFASTA.close(); - outFASTA.close(); - scrapFASTA.close(); - if (oligoFile != "") { outGroups.close(); } - if(qFileName != "") { qFile.close(); scrapQual.close(); outQual.close(); } + trimFASTAFile.close(); + scrapFASTAFile.close(); + if (oligoFile != "") { outGroupsFile.close(); } + if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); } return count; } @@ -654,7 +657,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string /**************************************************************************************************/ -int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector fastaNames, vector qualNames) { +int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector > fastaFileNames, vector > qualFileNames) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) int process = 1; @@ -669,22 +672,49 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - for (int i = 0; i < fastaNames.size(); i++) { - fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp"); - //clear old file if it exists + + vector > tempFASTAFileNames = fastaFileNames; + vector > tempPrimerQualFileNames = qualFileNames; + + if(allFiles){ ofstream temp; - m->openOutputFile(fastaNames[i], temp); - temp.close(); - if(qFileName != ""){ - qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp"); - //clear old file if it exists - ofstream temp2; - m->openOutputFile(qualNames[i], temp2); - temp2.close(); + + for(int i=0;iopenOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); + + if(qFileName != ""){ + tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp"; + m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + } + } + } } } + + driverCreateTrim(filename, + qFileName, + (trimFASTAFileName + toString(getpid()) + ".temp"), + (scrapFASTAFileName + toString(getpid()) + ".temp"), + (trimQualFileName + toString(getpid()) + ".temp"), + (scrapQualFileName + toString(getpid()) + ".temp"), + (groupFile + toString(getpid()) + ".temp"), + tempFASTAFileNames, + tempPrimerQualFileNames, + lines[process], + qLines[process]); + + //pass groupCounts to parent + ofstream out; + string tempFile = filename + toString(getpid()) + ".num.temp"; + m->openOutputFile(tempFile, out); + for(int i = 0; i < groupCounts.size(); i++) { + out << groupCounts[i] << endl; + } + out.close(); - driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); @@ -694,21 +724,13 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName } //parent do my part - for (int i = 0; i < fastaNames.size(); i++) { - //clear old file if it exists - ofstream temp; - m->openOutputFile(fastaNames[i], temp); - temp.close(); - if(qFileName != ""){ - //clear old file if it exists - ofstream temp2; - m->openOutputFile(qualNames[i], temp2); - temp2.close(); - } - } - - driverCreateTrim(filename, qFileName, trimFile, scrapFile, trimQFile, scrapQFile, groupFile, fastaNames, qualNames, lines[0], qLines[0]); - + ofstream temp; + m->openOutputFile(trimFASTAFileName, temp); temp.close(); + m->openOutputFile(scrapFASTAFileName, temp); temp.close(); + m->openOutputFile(trimQualFileName, temp); temp.close(); + m->openOutputFile(scrapQualFileName, temp); temp.close(); + + driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); //force parent to wait until all the processes are done for (int i=0;imothurOut("Appending files from process " + processIDS[i]); m->mothurOutEndLine(); - m->appendFiles((trimFile + toString(processIDS[i]) + ".temp"), trimFile); - remove((trimFile + toString(processIDS[i]) + ".temp").c_str()); - m->appendFiles((scrapFile + toString(processIDS[i]) + ".temp"), scrapFile); - remove((scrapFile + toString(processIDS[i]) + ".temp").c_str()); + m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine(); - m->mothurOut("Done with fasta files"); m->mothurOutEndLine(); + m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName); + remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str()); + m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName); + remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str()); if(qFileName != ""){ - m->appendFiles((trimQFile + toString(processIDS[i]) + ".temp"), trimQFile); - remove((trimQFile + toString(processIDS[i]) + ".temp").c_str()); - m->appendFiles((scrapQFile + toString(processIDS[i]) + ".temp"), scrapQFile); - remove((scrapQFile + toString(processIDS[i]) + ".temp").c_str()); - - m->mothurOut("Done with quality files"); m->mothurOutEndLine(); + m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName); + remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str()); + m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName); + remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str()); } m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile); remove((groupFile + toString(processIDS[i]) + ".temp").c_str()); - m->mothurOut("Done with group file"); m->mothurOutEndLine(); - for (int j = 0; j < fastaNames.size(); j++) { - m->appendFiles((fastaNames[j] + toString(processIDS[i]) + ".temp"), fastaNames[j]); - remove((fastaNames[j] + toString(processIDS[i]) + ".temp").c_str()); + if(allFiles){ + for(int j=0;jappendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]); + remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); + + if(qFileName != ""){ + m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]); + remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); + } + } + } + } } - if(qFileName != ""){ - for (int j = 0; j < qualNames.size(); j++) { - m->appendFiles((qualNames[j] + toString(processIDS[i]) + ".temp"), qualNames[j]); - remove((qualNames[j] + toString(processIDS[i]) + ".temp").c_str()); - } + ifstream in; + string tempFile = filename + toString(processIDS[i]) + ".num.temp"; + m->openInputFile(tempFile, in); + int count = 0; + int tempNum; + while (!in.eof()) { + in >> tempNum; m->gobble(in); + groupCounts[count] += tempNum; + count++; } + in.close(); remove(tempFile.c_str()); - if (allFiles) { m->mothurOut("Done with allfiles"); m->mothurOutEndLine(); } } return exitCommand; @@ -848,9 +881,10 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector& outFASTAVec, vector& outQualVec){ +void TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector >& qualFileNames){ try { ifstream inOligos; m->openInputFile(oligoFile, inOligos); @@ -858,10 +892,12 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& out ofstream test; string type, oligo, group; - int index=0; - //int indexPrimer = 0; + + int indexPrimer = 0; + int indexBarcode = 0; while(!inOligos.eof()){ + inOligos >> type; m->gobble(inOligos); if(type[0] == '#'){ @@ -893,29 +929,8 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& out map::iterator itPrime = primers.find(oligo); if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - primers[oligo]=index; index++; - groupVector.push_back(group); - - if(allFiles){ - outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - if(qFileName != ""){ - outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - } - if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct - filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - if(qFileName != ""){ - filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - } - }else { - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - if(qFileName != ""){ - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - } - } - } - + primers[oligo]=indexPrimer; indexPrimer++; + primerNameVector.push_back(group); } else if(type == "REVERSE"){ Sequence oligoRC("reverse", oligo); @@ -928,58 +943,94 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& out //check for repeat barcodes map::iterator itBar = barcodes.find(oligo); if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - barcodes[oligo]=index; index++; - groupVector.push_back(group); - if(allFiles){ - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - if(qFileName != ""){ - outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - } - } - - }else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } + barcodes[oligo]=indexBarcode; indexBarcode++; + barcodeNameVector.push_back(group); + } + else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } } m->gobble(inOligos); - } - + } inOligos.close(); + if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; } + //add in potential combos + if(barcodeNameVector.size() == 0){ + barcodes[""] = 0; + barcodeNameVector.push_back(""); + } + + if(primerNameVector.size() == 0){ + primers[""] = 0; + primerNameVector.push_back(""); + } + + fastaFileNames.resize(barcodeNameVector.size()); + for(int i=0;i::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) { - for (map::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) { - if (groupVector[itPrime->second] != "") { //there is a group for this primer - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta")); - outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta")); - outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta")); - combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1; - - if(qFileName != ""){ - outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual")); - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual")); - outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual")); + set uniqueNames; //used to cleanup outputFileNames + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = primerNameVector[itPrimer->second]; + string barcodeName = barcodeNameVector[itBar->second]; + + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->second]; + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->second]; } + else{ + comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + } + } + + ofstream temp; + fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta"; + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + } + + fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; + m->openOutputFile(fastaFileName, temp); temp.close(); + + if(qFileName != ""){ + qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual"; + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + + qualFileNames[itBar->second][itPrimer->second] = qualFileName; + m->openOutputFile(qualFileName, temp); temp.close(); } } } } - numFPrimers = primers.size(); numRPrimers = revPrimer.size(); - + groupCounts.resize(barcodeNameVector.size(), 0); + } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "getOligos"); exit(1); } } + //*************************************************************************************************************** int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){ @@ -1010,11 +1061,9 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group } //if you found the barcode or if you don't want to allow for diffs -// cout << success; if ((bdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it -// cout << endl; int maxLength = 0; @@ -1059,11 +1108,8 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); - int newStart=0; int numDiff = countDiffs(oligo, temp); -// cout << oligo << '\t' << temp << '\t' << numDiff << endl; - if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -1096,7 +1142,6 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group if (alignment != NULL) { delete alignment; } } -// cout << success << endl; return success; @@ -1128,7 +1173,6 @@ int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group seq.setUnaligned(rawSequence.substr(oligo.length())); if(qual.getName() != ""){ qual.trimQScores(oligo.length(), -1); - } success = 0; break; @@ -1136,11 +1180,9 @@ int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group } //if you found the barcode or if you don't want to allow for diffs -// cout << success; if ((pdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it -// cout << endl; int maxLength = 0; @@ -1185,11 +1227,8 @@ int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); - int newStart=0; int numDiff = countDiffs(oligo, temp); -// cout << oligo << '\t' << temp << '\t' << numDiff << endl; - if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -1266,6 +1305,52 @@ bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){ //*************************************************************************************************************** +bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){ + try { + bool success = 1; + if(qscores.getName() != ""){ + qscores.trimQScores(-1, keepFirst); + } + sequence.trim(keepFirst); + return success; + } + catch(exception& e) { + m->errorOut(e, "keepFirstTrim", "countDiffs"); + exit(1); + } + +} + +//*************************************************************************************************************** + +bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){ + try { + bool success = 0; + + int length = sequence.getNumBases() - removeLast; + + if(length > 0){ + if(qscores.getName() != ""){ + qscores.trimQScores(-1, length); + } + sequence.trim(length); + success = 1; + } + else{ + success = 0; + } + + return success; + } + catch(exception& e) { + m->errorOut(e, "removeLastTrim", "countDiffs"); + exit(1); + } + +} + +//*************************************************************************************************************** + bool TrimSeqsCommand::cullLength(Sequence& seq){ try { @@ -1362,6 +1447,7 @@ bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){ } } + //*************************************************************************************************************** int TrimSeqsCommand::countDiffs(string oligo, string seq){ @@ -1397,78 +1483,5 @@ int TrimSeqsCommand::countDiffs(string oligo, string seq){ } } -//*************************************************************************************************************** - -//bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){ -// try { -// -// string rawSequence = seq.getUnaligned(); -// int seqLength = seq.getNumBases(); -// bool success = 0; //guilty until proven innocent -// string name; -// -// qFile >> name; -// if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } } -// -// while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } -// -// int score; -// int end = seqLength; -// -// for(int i=0;i> score; -// -// if(score < qThreshold){ -// end = i; -// break; -// } -// } -// for(int i=end+1;i> score; -// } -// -// seq.setUnaligned(rawSequence.substr(0,end)); -// -// return 1; -// } -// catch(exception& e) { -// m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold"); -// exit(1); -// } -//} - -//*************************************************************************************************************** - -//bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){ -// try { -// string rawSequence = seq.getUnaligned(); -// int seqLength = seq.getNumBases(); -// bool success = 0; //guilty until proven innocent -// string name; -// -// qFile >> name; -// if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } } -// -// while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } -// -// float score; -// float average = 0; -// -// for(int i=0;i> score; -// average += score; -// } -// average /= seqLength; -// -// if(average >= qAverage) { success = 1; } -// else { success = 0; } -// -// return success; -// } -// catch(exception& e) { -// m->errorOut(e, "TrimSeqsCommand", "cullQualAverage"); -// exit(1); -// } -//} //***************************************************************************************************************