X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=f684b66a8e72d18f98172579197fac7cc5a2f543;hb=191ae1be0679d5cf4eda950b3b1bf26fb7dd503d;hp=705ac25f5a5e727424a2567a0d358b4c1dc951e5;hpb=8ef6687c1f586285d01c000cc5e359bf9c07c717;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 705ac25..f684b66 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -10,12 +10,64 @@ #include "trimseqscommand.h" #include "needlemanoverlap.hpp" +//********************************************************************************************************************** +vector TrimSeqsCommand::getValidParameters(){ + try { + string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", + "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +TrimSeqsCommand::TrimSeqsCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["qual"] = tempOutNames; + outputTypes["group"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector TrimSeqsCommand::getRequiredParameters(){ + try { + string Array[] = {"fasta"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector TrimSeqsCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles"); + exit(1); + } +} //*************************************************************************************************************** TrimSeqsCommand::TrimSeqsCommand(string option) { try { abort = false; + comboStarts = 0; //allow user to run help if(option == "help") { help(); abort = true; } @@ -23,7 +75,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else { //valid paramters for this command string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", - "qthreshold", "qaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; + "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); @@ -38,6 +90,12 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["qual"] = tempOutNames; + outputTypes["group"] = tempOutNames; + //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } @@ -46,7 +104,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { it = parameters.find("fasta"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["fasta"] = inputDir + it->second; } } @@ -54,7 +112,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { it = parameters.find("oligos"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["oligos"] = inputDir + it->second; } } @@ -62,7 +120,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { it = parameters.find("qfile"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["qfile"] = inputDir + it->second; } } @@ -71,21 +129,22 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { //check for required parameters fastaFile = validParameter.validFile(parameters, "fasta", true); - if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the screen.seqs command."); m->mothurOutEndLine(); abort = true; } + if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; } else if (fastaFile == "not open") { abort = true; } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; - outputDir += hasPath(fastaFile); //if user entered a file with a path then preserve it + outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it } + //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; temp = validParameter.validFile(parameters, "flip", false); if (temp == "not found"){ flip = 0; } - else if(isTrue(temp)) { flip = 1; } + else if(m->isTrue(temp)) { flip = 1; } temp = validParameter.validFile(parameters, "oligos", true); if (temp == "not found"){ oligoFile = ""; } @@ -104,7 +163,6 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; } convert(temp, maxLength); - temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; } convert(temp, bdiffs); @@ -124,16 +182,28 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; } convert(temp, qThreshold); - temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "F"; } - qtrim = isTrue(temp); + temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "F"; } + qtrim = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; } + convert(temp, qRollAverage); + + temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; } + convert(temp, qWindowAverage); + + temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "100"; } + convert(temp, qWindowSize); + + temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "10"; } + convert(temp, qWindowStep); temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; } convert(temp, qAverage); temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; } - allFiles = isTrue(temp); + allFiles = m->isTrue(temp); - temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } convert(temp, processors); if(allFiles && oligoFile == ""){ @@ -163,7 +233,7 @@ void TrimSeqsCommand::help(){ m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n"); m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n"); m->mothurOut("The fasta parameter is required.\n"); - m->mothurOut("The flip parameter .... The default is 0.\n"); + m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"); m->mothurOut("The oligos parameter .... The default is "".\n"); m->mothurOut("The maxambig parameter .... The default is -1.\n"); m->mothurOut("The maxhomop parameter .... The default is 0.\n"); @@ -206,97 +276,173 @@ int TrimSeqsCommand::execute(){ numFPrimers = 0; //this needs to be initialized numRPrimers = 0; - string trimSeqFile = outputDir + getRootName(getSimpleName(fastaFile)) + "trim.fasta"; - outputNames.push_back(trimSeqFile); - string scrapSeqFile = outputDir + getRootName(getSimpleName(fastaFile)) + "scrap.fasta"; - outputNames.push_back(scrapSeqFile); - string groupFile = outputDir + getRootName(getSimpleName(fastaFile)) + "groups"; + string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta"; + outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); + string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta"; + outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile); + string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual"; + string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual"; + if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); } + string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; vector fastaFileNames; + vector qualFileNames; if(oligoFile != ""){ - outputNames.push_back(groupFile); - getOligos(fastaFileNames); + outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); + getOligos(fastaFileNames, qualFileNames); } - - if(qFileName != "") { setLines(qFileName, qLines); } + vector fastaFilePos; + vector qFilePos; + + setLines(fastaFile, qFileName, fastaFilePos, qFilePos); + + for (int i = 0; i < (fastaFilePos.size()-1); i++) { + lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)])); + if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); } + } + if(qFileName == "") { qLines = lines; } //files with duds #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ - ifstream inFASTA; - openInputFile(fastaFile, inFASTA); - int numSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); - inFASTA.close(); - - lines.push_back(new linePair(0, numSeqs)); - - driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, groupFile, fastaFileNames, lines[0], lines[0]); + + driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); for (int j = 0; j < fastaFileNames.size(); j++) { rename((fastaFileNames[j] + toString(getpid()) + ".temp").c_str(), fastaFileNames[j].c_str()); } + if(qFileName != ""){ + for (int j = 0; j < qualFileNames.size(); j++) { + rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str()); + } + } }else{ - setLines(fastaFile, lines); - if(qFileName == "") { qLines = lines; } - - createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, groupFile, fastaFileNames); + createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); rename((trimSeqFile + toString(processIDS[0]) + ".temp").c_str(), trimSeqFile.c_str()); rename((scrapSeqFile + toString(processIDS[0]) + ".temp").c_str(), scrapSeqFile.c_str()); rename((groupFile + toString(processIDS[0]) + ".temp").c_str(), groupFile.c_str()); + + if(qFileName != ""){ + rename((trimQualFile + toString(processIDS[0]) + ".temp").c_str(), trimQualFile.c_str()); + rename((scrapQualFile + toString(processIDS[0]) + ".temp").c_str(), scrapQualFile.c_str()); + } + + for (int j = 0; j < fastaFileNames.size(); j++) { rename((fastaFileNames[j] + toString(processIDS[0]) + ".temp").c_str(), fastaFileNames[j].c_str()); } + if(qFileName != ""){ + for (int j = 0; j < qualFileNames.size(); j++) { + rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str()); + } + } + //append files for(int i=1;iappendFiles((trimSeqFile + toString(processIDS[i]) + ".temp"), trimSeqFile); remove((trimSeqFile + toString(processIDS[i]) + ".temp").c_str()); - appendFiles((scrapSeqFile + toString(processIDS[i]) + ".temp"), scrapSeqFile); + m->appendFiles((scrapSeqFile + toString(processIDS[i]) + ".temp"), scrapSeqFile); remove((scrapSeqFile + toString(processIDS[i]) + ".temp").c_str()); - appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile); + + m->appendFiles((trimQualFile + toString(processIDS[i]) + ".temp"), trimQualFile); + remove((trimQualFile + toString(processIDS[i]) + ".temp").c_str()); + m->appendFiles((scrapQualFile + toString(processIDS[i]) + ".temp"), scrapQualFile); + remove((scrapQualFile + toString(processIDS[i]) + ".temp").c_str()); + + m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile); remove((groupFile + toString(processIDS[i]) + ".temp").c_str()); for (int j = 0; j < fastaFileNames.size(); j++) { - appendFiles((fastaFileNames[j] + toString(processIDS[i]) + ".temp"), fastaFileNames[j]); + m->appendFiles((fastaFileNames[j] + toString(processIDS[i]) + ".temp"), fastaFileNames[j]); remove((fastaFileNames[j] + toString(processIDS[i]) + ".temp").c_str()); } + + if(qFileName != ""){ + for (int j = 0; j < qualFileNames.size(); j++) { + m->appendFiles((qualFileNames[j] + toString(processIDS[i]) + ".temp"), qualFileNames[j]); + remove((qualFileNames[j] + toString(processIDS[i]) + ".temp").c_str()); + } + } + + } } if (m->control_pressed) { return 0; } #else - ifstream inFASTA; - openInputFile(fastaFile, inFASTA); - int numSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); - inFASTA.close(); - - lines.push_back(new linePair(0, numSeqs)); - - driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, groupFile, fastaFileNames, lines[0], lines[0]); + driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); + for (int j = 0; j < fastaFileNames.size(); j++) { + rename((fastaFileNames[j] + toString(j) + ".temp").c_str(), fastaFileNames[j].c_str()); + } + if(qFileName != ""){ + for (int j = 0; j < qualFileNames.size(); j++) { + rename((qualFileNames[j] + toString(j) + ".temp").c_str(), qualFileNames[j].c_str()); + } + } + if (m->control_pressed) { return 0; } #endif for(int i=0;i'){ - inFASTA >> seqName; - outGroups << seqName << '\t' << groupVector[i] << endl; + if (m->isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); } + else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); } + else { + ifstream inFASTA; + string seqName; + m->openInputFile(fastaFileNames[i], inFASTA); + ofstream outGroups; + string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups"; + m->openOutputFile(outGroupFilename, outGroups); + outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename); + + string thisGroup = ""; + if (i > comboStarts) { + map::iterator itCombo; + for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){ + if(itCombo->second == i){ thisGroup = itCombo->first; combos.erase(itCombo); break; } + } + }else{ thisGroup = groupVector[i]; } + + while(!inFASTA.eof()){ + if(inFASTA.get() == '>'){ + inFASTA >> seqName; + outGroups << seqName << '\t' << thisGroup << endl; + } + while (!inFASTA.eof()) { char c = inFASTA.get(); if (c == 10 || c == 13){ break; } } + } + outGroups.close(); + inFASTA.close(); + } + } + + if(qFileName != ""){ + for(int i=0;iisBlank(qualFileNames[i])) { remove(qualFileNames[i].c_str()); } + else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); } + else { + ifstream inQual; + string seqName; + m->openInputFile(qualFileNames[i], inQual); +// ofstream outGroups; +// +// string thisGroup = ""; +// if (i > comboStarts) { +// map::iterator itCombo; +// for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){ +// if(itCombo->second == i){ thisGroup = itCombo->first; combos.erase(itCombo); break; } +// } +// } +// else{ thisGroup = groupVector[i]; } + + inQual.close(); } - while (!inFASTA.eof()) { char c = inFASTA.get(); if (c == 10 || c == 13){ break; } } } - outGroups.close(); - inFASTA.close(); } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; @@ -317,116 +463,184 @@ int TrimSeqsCommand::execute(){ } /**************************************************************************************/ -int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string groupFile, vector fastaNames, linePair* line, linePair* qline) { + +int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector fastaNames, vector qualNames, linePair* line, linePair* qline) { + try { ofstream outFASTA; - int able = openOutputFile(trimFile, outFASTA); + int able = m->openOutputFile(trimFile, outFASTA); ofstream scrapFASTA; - openOutputFile(scrapFile, scrapFASTA); + m->openOutputFile(scrapFile, scrapFASTA); + + ofstream outQual; + ofstream scrapQual; + if(qFileName != ""){ + m->openOutputFile(trimQFile, outQual); + m->openOutputFile(scrapQFile, scrapQual); + } ofstream outGroups; - vector fastaFileNames; + //vector fastaFileNames; + //vector qualFileNames; + if (oligoFile != "") { - openOutputFile(groupFile, outGroups); + m->openOutputFile(groupFile, outGroups); for (int i = 0; i < fastaNames.size(); i++) { + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp"); + //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + //clear old file if it exists + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); + if(qFileName != ""){ + qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp"); + //qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + //clear old file if it exists + ofstream temp2; + m->openOutputFile(qualNames[i], temp2); + temp2.close(); + } #else - fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + fastaNames[i] = (fastaNames[i] + toString(i) + ".temp"); + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); + if(qFileName != ""){ + //qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + qualNames[i] = (qualNames[i] + toString(i) + ".temp"); + ofstream temp2; + m->openOutputFile(qualNames[i], temp2); + temp2.close(); + } #endif } } ifstream inFASTA; - openInputFile(filename, inFASTA); + m->openInputFile(filename, inFASTA); + inFASTA.seekg(line->start); ifstream qFile; - if(qFileName != "") { openInputFile(qFileName, qFile); } + if(qFileName != "") { m->openInputFile(qFileName, qFile); qFile.seekg(qline->start); } - qFile.seekg(qline->start); - inFASTA.seekg(line->start); - - for(int i=0;inum;i++){ + bool done = false; + int count = 0; + + while (!done) { if (m->control_pressed) { - inFASTA.close(); - outFASTA.close(); - scrapFASTA.close(); + inFASTA.close(); outFASTA.close(); scrapFASTA.close(); if (oligoFile != "") { outGroups.close(); } - if(qFileName != "") { qFile.close(); } - for(int i=0;iclose(); - delete fastaFileNames[i]; - } + + //for(int i=0;iclose(); delete fastaFileNames[i]; } + + if(qFileName != ""){ + qFile.close(); + //for(int i=0;iclose(); delete qualFileNames[i]; } + } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } + return 0; } int success = 1; - Sequence currSeq(inFASTA); + Sequence currSeq(inFASTA); m->gobble(inFASTA); + + QualityScores currQual; + if(qFileName != ""){ + currQual = QualityScores(qFile, currSeq.getNumBases()); m->gobble(qFile); + } + string origSeq = currSeq.getUnaligned(); if (origSeq != "") { - int group; + int groupBar, groupPrime; string trashCode = ""; int currentSeqsDiffs = 0; - + if(qFileName != ""){ - if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); } - else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); } - + if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); } + else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); } + else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); } + else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); } + if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap } - if(!success) { trashCode += 'q'; } + if(!success) { trashCode += 'q'; } } if(barcodes.size() != 0){ - success = stripBarcode(currSeq, group); - if(success > bdiffs){ trashCode += 'b'; } + success = stripBarcode(currSeq, currQual, groupBar); + if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } if(numFPrimers != 0){ - success = stripForward(currSeq); - if(success > pdiffs){ trashCode += 'f'; } + success = stripForward(currSeq, currQual, groupPrime); + if(success > pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } } - if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } + if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } if(numRPrimers != 0){ - success = stripReverse(currSeq); - if(!success){ trashCode += 'r'; } + success = stripReverse(currSeq, currQual); + if(!success) { trashCode += 'r'; } } if(minLength > 0 || maxLength > 0){ success = cullLength(currSeq); - if(!success){ trashCode += 'l'; } + if(!success) { trashCode += 'l'; } } if(maxHomoP > 0){ success = cullHomoP(currSeq); - if(!success){ trashCode += 'h'; } + if(!success) { trashCode += 'h'; } } if(maxAmbig != -1){ success = cullAmbigs(currSeq); - if(!success){ trashCode += 'n'; } + if(!success) { trashCode += 'n'; } } - if(flip){ currSeq.reverseComplement(); } // should go last + if(flip){ currSeq.reverseComplement(); currQual.flipQScores(); } // should go last if(trashCode.length() == 0){ currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(outFASTA); + currQual.printQScores(outQual); + if(barcodes.size() != 0){ - outGroups << currSeq.getName() << '\t' << groupVector[group] << endl; - + string thisGroup = groupVector[groupBar]; + int indexToFastaFile = groupBar; + if (primers.size() != 0){ + //does this primer have a group + if (groupVector[groupPrime] != "") { + thisGroup += "." + groupVector[groupPrime]; + indexToFastaFile = combos[thisGroup]; + } + } + outGroups << currSeq.getName() << '\t' << thisGroup << endl; if(allFiles){ - currSeq.printSequence(*fastaFileNames[group]); + ofstream outTemp; + m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp); + //currSeq.printSequence(*fastaFileNames[indexToFastaFile]); + currSeq.printSequence(outTemp); + outTemp.close(); + + if(qFileName != ""){ + //currQual.printQScores(*qualFileNames[indexToFastaFile]); + ofstream outTemp2; + m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2); + currQual.printQScores(outTemp2); + outTemp2.close(); + } } } } @@ -435,31 +649,55 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string currSeq.setUnaligned(origSeq); currSeq.setAligned(origSeq); currSeq.printSequence(scrapFASTA); + currQual.printQScores(scrapQual); } + count++; } - gobble(inFASTA); + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + unsigned long int pos = inFASTA.tellg(); + if ((pos == -1) || (pos >= line->end)) { break; } + #else + if (inFASTA.eof()) { break; } + #endif + + //report progress + if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + } + //report progress + if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + inFASTA.close(); outFASTA.close(); scrapFASTA.close(); if (oligoFile != "") { outGroups.close(); } - if(qFileName != "") { qFile.close(); } + if(qFileName != "") { qFile.close(); scrapQual.close(); outQual.close(); } - for(int i=0;iclose(); - delete fastaFileNames[i]; - } + //for(int i=0;iclose(); + // delete fastaFileNames[i]; + //} + + //if(qFileName != ""){ + //for(int i=0;iclose(); + //delete qualFileNames[i]; + //} + //} - return 0; + return count; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim"); exit(1); } } + /**************************************************************************************************/ -int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string groupFile, vector fastaNames) { + +int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector fastaNames, vector qualNames) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) int process = 0; @@ -474,7 +712,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, lines[process], qLines[process]); + driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]); exit(0); }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } } @@ -493,35 +731,73 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName exit(1); } } + /**************************************************************************************************/ -int TrimSeqsCommand::setLines(string filename, vector& lines) { +int TrimSeqsCommand::setLines(string filename, string qfilename, vector& fastaFilePos, vector& qfileFilePos) { try { - lines.clear(); + //set file positions for fasta file + fastaFilePos = m->divideFile(filename, processors); - vector positions; + if (qfilename == "") { return processors; } - ifstream inFASTA; - openInputFile(filename, inFASTA); + //get name of first sequence in each chunk + map firstSeqNames; + for (int i = 0; i < (fastaFilePos.size()-1); i++) { + ifstream in; + m->openInputFile(filename, in); + in.seekg(fastaFilePos[i]); + + Sequence temp(in); + firstSeqNames[temp.getName()] = i; + + in.close(); + } + + //seach for filePos of each first name in the qfile and save in qfileFilePos + ifstream inQual; + m->openInputFile(qfilename, inQual); string input; - while(!inFASTA.eof()){ - input = getline(inFASTA); + while(!inQual.eof()){ + input = m->getline(inQual); if (input.length() != 0) { - if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + if(input[0] == '>'){ //this is a sequence name line + istringstream nameStream(input); + + string sname = ""; nameStream >> sname; + sname = sname.substr(1); + + map::iterator it = firstSeqNames.find(sname); + + if(it != firstSeqNames.end()) { //this is the start of a new chunk + unsigned long int pos = inQual.tellg(); + qfileFilePos.push_back(pos - input.length() - 1); + firstSeqNames.erase(it); + } + } } + + if (firstSeqNames.size() == 0) { break; } } - inFASTA.close(); + inQual.close(); - int numFastaSeqs = positions.size(); - + if (firstSeqNames.size() != 0) { + for (map::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { + m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine(); + } + qFileName = ""; + return processors; + } + + //get last file position of qfile FILE * pFile; - long size; + unsigned long int size; //get num bytes in file - pFile = fopen (filename.c_str(),"rb"); + pFile = fopen (qfilename.c_str(),"rb"); if (pFile==NULL) perror ("Error opening file"); else{ fseek (pFile, 0, SEEK_END); @@ -529,20 +805,9 @@ int TrimSeqsCommand::setLines(string filename, vector& lines) { fclose (pFile); } - int numSeqsPerProcessor = numFastaSeqs / processors; - - for (int i = 0; i < processors; i++) { - - long int startPos = positions[ i * numSeqsPerProcessor ]; - if(i == processors - 1){ - numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; - }else{ - long int myEnd = positions[ (i+1) * numSeqsPerProcessor ]; - } - lines.push_back(new linePair(startPos, numSeqsPerProcessor)); - } + qfileFilePos.push_back(size); - return numFastaSeqs; + return processors; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "setLines"); @@ -551,23 +816,27 @@ int TrimSeqsCommand::setLines(string filename, vector& lines) { } //*************************************************************************************************************** -void TrimSeqsCommand::getOligos(vector& outFASTAVec){ //vector& outFASTAVec +void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& outQualVec){ try { ifstream inOligos; - openInputFile(oligoFile, inOligos); + m->openInputFile(oligoFile, inOligos); ofstream test; string type, oligo, group; int index=0; + //int indexPrimer = 0; while(!inOligos.eof()){ inOligos >> type; - + if(type[0] == '#'){ while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there } else{ + //make type case insensitive + for(int i=0;i> oligo; for(int i=0;i& outFASTAVec){ //vector::iterator itPrime = primers.find(oligo); + if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } + + primers[oligo]=index; index++; + groupVector.push_back(group); + + if(allFiles){ + outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + if(qFileName != ""){ + outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + } + if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct + filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + if(qFileName != ""){ + filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + } + }else { + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + if(qFileName != ""){ + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + } + } + } + } - else if(type == "reverse"){ + else if(type == "REVERSE"){ Sequence oligoRC("reverse", oligo); oligoRC.reverseComplement(); revPrimer.push_back(oligoRC.getUnaligned()); } - else if(type == "barcode"){ + else if(type == "BARCODE"){ inOligos >> group; - barcodes[oligo]=index++; + + //check for repeat barcodes + map::iterator itBar = barcodes.find(oligo); + if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } + + barcodes[oligo]=index; index++; groupVector.push_back(group); if(allFiles){ - //outFASTAVec.push_back(new ofstream((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta").c_str(), ios::ate)); - outputNames.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta")); - outFASTAVec.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta")); + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + if(qFileName != ""){ + outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + } } - } + }else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } } + m->gobble(inOligos); } inOligos.close(); - numFPrimers = forPrimer.size(); + //add in potential combos + if(allFiles){ + comboStarts = outFASTAVec.size()-1; + for (map::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) { + for (map::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) { + if (groupVector[itPrime->second] != "") { //there is a group for this primer + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta")); + outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta")); + outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta")); + combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1; + + if(qFileName != ""){ + outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual")); + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual")); + outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual")); + } + } + } + } + } + + numFPrimers = primers.size(); numRPrimers = revPrimer.size(); } @@ -610,7 +947,7 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec){ //vectorsecond; seq.setUnaligned(rawSequence.substr(oligo.length())); + + if(qual.getName() != ""){ + qual.trimQScores(oligo.length(), -1); + } + success = 0; break; } @@ -650,7 +992,7 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ maxLength = it->first.length(); } } - alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, (maxLength+bdiffs+1)); + alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1)); }else{ alignment = NULL; } @@ -684,6 +1026,9 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ int newStart=0; int numDiff = countDiffs(oligo, temp); + +// cout << oligo << '\t' << temp << '\t' << numDiff << endl; + if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -706,6 +1051,10 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ else{ //use the best match group = minGroup; seq.setUnaligned(rawSequence.substr(minPos)); + + if(qual.getName() != ""){ + qual.trimQScores(minPos, -1); + } success = minDiff; } @@ -726,27 +1075,31 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ //*************************************************************************************************************** -int TrimSeqsCommand::stripForward(Sequence& seq){ +int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){ try { string rawSequence = seq.getUnaligned(); int success = pdiffs + 1; //guilty until proven innocent //can you find the primer - for(int i=0;i::iterator it=primers.begin();it!=primers.end();it++){ + string oligo = it->first; + if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; } if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ + group = it->second; seq.setUnaligned(rawSequence.substr(oligo.length())); + if(qual.getName() != ""){ + qual.trimQScores(oligo.length(), -1); + + } success = 0; break; } } - + //if you found the barcode or if you don't want to allow for diffs // cout << success; if ((pdiffs == 0) || (success == 0)) { return success; } @@ -757,24 +1110,27 @@ int TrimSeqsCommand::stripForward(Sequence& seq){ int maxLength = 0; Alignment* alignment; - if (numFPrimers > 0) { + if (primers.size() > 0) { + map::iterator it=primers.begin(); - for(int i=0;i maxLength){ - maxLength = forPrimer[i].length(); + for(it;it!=primers.end();it++){ + if(it->first.length() > maxLength){ + maxLength = it->first.length(); } } - alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, (maxLength+pdiffs+1)); + alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1)); }else{ alignment = NULL; } //can you find the barcode int minDiff = 1e6; int minCount = 1; + int minGroup = -1; int minPos = 0; - for(int i=0;i::iterator it=primers.begin();it!=primers.end();it++){ + string oligo = it->first; +// int length = oligo.length(); if(rawSequence.length() < maxLength){ success = pdiffs + 100; @@ -793,12 +1149,16 @@ int TrimSeqsCommand::stripForward(Sequence& seq){ } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); - + int newStart=0; int numDiff = countDiffs(oligo, temp); + +// cout << oligo << '\t' << temp << '\t' << numDiff << endl; + if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; + minGroup = it->second; minPos = 0; for(int i=0;i pdiffs) { success = minDiff; } - else if(minCount > 1) { success = pdiffs + 10; } - else{ + + if(minDiff > pdiffs) { success = minDiff; } //no good matches + else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers + else{ //use the best match + group = minGroup; seq.setUnaligned(rawSequence.substr(minPos)); + if(qual.getName() != ""){ + qual.trimQScores(minPos, -1); + } success = minDiff; } if (alignment != NULL) { delete alignment; } } + return success; } @@ -832,7 +1198,7 @@ int TrimSeqsCommand::stripForward(Sequence& seq){ //*************************************************************************************************************** -bool TrimSeqsCommand::stripReverse(Sequence& seq){ +bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){ try { string rawSequence = seq.getUnaligned(); bool success = 0; //guilty until proven innocent @@ -847,6 +1213,9 @@ bool TrimSeqsCommand::stripReverse(Sequence& seq){ if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){ seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length())); + if(qual.getName() != ""){ + qual.trimQScores(-1, rawSequence.length()-oligo.length()); + } success = 1; break; } @@ -980,9 +1349,9 @@ int TrimSeqsCommand::countDiffs(string oligo, string seq){ else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; } else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; } else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; } - else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; } - + else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; } } + } return countDiffs; @@ -995,102 +1364,76 @@ int TrimSeqsCommand::countDiffs(string oligo, string seq){ } //*************************************************************************************************************** -bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){ - try { +//bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){ +// try { +// // string rawSequence = seq.getUnaligned(); -// int seqLength; // = rawSequence.length(); -// string name, temp, temp2; +// int seqLength = seq.getNumBases(); +// bool success = 0; //guilty until proven innocent +// string name; // // qFile >> name; +// if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } } +// +// while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } // -// //get rest of line -// temp = ""; -// while (!qFile.eof()) { -// char c = qFile.get(); -// if (c == 10 || c == 13){ break; } -// else { temp += c; } -// } -// -// int pos = temp.find("length"); -// if (pos == temp.npos) { m->mothurOut("Cannot find length in qfile for " + seq.getName()); m->mothurOutEndLine(); seqLength = 0; } -// else { -// string tempLength = temp.substr(pos); -// istringstream iss (tempLength,istringstream::in); -// iss >> temp; +// int score; +// int end = seqLength; +// +// for(int i=0;i> score; +// +// if(score < qThreshold){ +// end = i; +// break; +// } +// } +// for(int i=end+1;i> score; // } // -// splitAtEquals(temp2, temp); //separates length=242, temp=length, temp2=242 -// convert(temp, seqLength); //converts string to int -// -// if (name.length() != 0) { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); } } - - string rawSequence = seq.getUnaligned(); - int seqLength = seq.getNumBases(); - bool success = 0; //guilty until proven innocent - string name; - - qFile >> name; - if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } } - - while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } - - int score; - int end = seqLength; - - for(int i=0;i> score; - - if(score < qThreshold){ - end = i; - break; - } - } - for(int i=end+1;i> score; - } - - seq.setUnaligned(rawSequence.substr(0,end)); - - return 1; - } - catch(exception& e) { - m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold"); - exit(1); - } -} +// seq.setUnaligned(rawSequence.substr(0,end)); +// +// return 1; +// } +// catch(exception& e) { +// m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold"); +// exit(1); +// } +//} //*************************************************************************************************************** -bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){ - try { - string rawSequence = seq.getUnaligned(); - int seqLength = seq.getNumBases(); - bool success = 0; //guilty until proven innocent - string name; - - qFile >> name; - if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } } - - while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } - - float score; - float average = 0; - - for(int i=0;i> score; - average += score; - } - average /= seqLength; - - if(average >= qAverage) { success = 1; } - else { success = 0; } - - return success; - } - catch(exception& e) { - m->errorOut(e, "TrimSeqsCommand", "cullQualAverage"); - exit(1); - } -} +//bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){ +// try { +// string rawSequence = seq.getUnaligned(); +// int seqLength = seq.getNumBases(); +// bool success = 0; //guilty until proven innocent +// string name; +// +// qFile >> name; +// if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } } +// +// while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } +// +// float score; +// float average = 0; +// +// for(int i=0;i> score; +// average += score; +// } +// average /= seqLength; +// +// if(average >= qAverage) { success = 1; } +// else { success = 0; } +// +// return success; +// } +// catch(exception& e) { +// m->errorOut(e, "TrimSeqsCommand", "cullQualAverage"); +// exit(1); +// } +//} //***************************************************************************************************************