From: pschloss Date: Mon, 20 Dec 2010 12:29:36 +0000 (+0000) Subject: pat's changes to seq.error command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=fe5bbb79f9434df947347881c47b430112f4253e pat's changes to seq.error command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 5602504..9cd6afa 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -7,6 +7,8 @@ objects = { /* Begin PBXFileReference section */ + 7E13BDD112BE3FEE004B8A53 /* reportfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reportfile.h; sourceTree = ""; }; + 7E13BDD212BE3FEF004B8A53 /* reportfile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reportfile.cpp; sourceTree = ""; }; 7E4EBD43122018FB00D85E7B /* simpsoneven.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simpsoneven.h; sourceTree = ""; }; 7E4EBD44122018FB00D85E7B /* simpsoneven.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simpsoneven.cpp; sourceTree = ""; }; 7E5B28DC121FEFCC0005339C /* shannoneven.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shannoneven.h; sourceTree = ""; }; @@ -656,6 +658,8 @@ A7DA2174113FECD400BF472F /* validparameter.h */, A7DA2175113FECD400BF472F /* venn.cpp */, A7DA2176113FECD400BF472F /* venn.h */, + 7E13BDD112BE3FEE004B8A53 /* reportfile.h */, + 7E13BDD212BE3FEF004B8A53 /* reportfile.cpp */, ); name = mothur; sourceTree = ""; diff --git a/aligncommand.cpp b/aligncommand.cpp index 87ca6cb..3940842 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -555,7 +555,8 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam if (m->control_pressed) { return 0; } Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA); - + report.setCandidate(candidateSeq); + int origNumBases = candidateSeq->getNumBases(); string originalUnaligned = candidateSeq->getUnaligned(); int numBasesNeeded = origNumBases * threshold; @@ -618,7 +619,6 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam accnosFile << candidateSeq->getName() << wasBetter << endl; } - report.setCandidate(candidateSeq); report.setTemplate(templateSeq); report.setSearchParameters(search, searchScore); report.setAlignmentParameters(align, alignment); @@ -706,7 +706,8 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align istringstream iss (tempBuf,istringstream::in); Sequence* candidateSeq = new Sequence(iss); - + report.setCandidate(candidateSeq); + int origNumBases = candidateSeq->getNumBases(); string originalUnaligned = candidateSeq->getUnaligned(); int numBasesNeeded = origNumBases * threshold; @@ -777,7 +778,6 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align MPIWroteAccnos = true; } - report.setCandidate(candidateSeq); report.setTemplate(templateSeq); report.setSearchParameters(search, searchScore); report.setAlignmentParameters(align, alignment); diff --git a/mothur b/mothur index f7fdc10..1389411 100755 Binary files a/mothur and b/mothur differ diff --git a/qualityscores.cpp b/qualityscores.cpp index f76d982..2ee71e0 100644 --- a/qualityscores.cpp +++ b/qualityscores.cpp @@ -34,11 +34,8 @@ QualityScores::QualityScores(ifstream& qFile, int l){ seqLength = l; int score; - //string line; - //m->getline(qFile, line); - //istringstream nameStream(line); - qFile >> seqName; + while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13 || c == -1){ break; } } // get rest of line m->gobble(qFile); if (seqName == "") { m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg())); m->mothurOutEndLine(); } @@ -106,8 +103,6 @@ QualityScores::QualityScores(ifstream& qFile, int l){ } /**************************************************************************************************/ -/**************************************************************************************************/ - string QualityScores::getName(){ try { @@ -151,8 +146,10 @@ void QualityScores::trimQScores(int start, int end){ qScores = hold; } if(start == -1){ - hold = vector(qScores.begin(), qScores.begin()+end); //not sure if indexing is correct - qScores = hold; + if(qScores.size() > end){ + hold = vector(qScores.begin(), qScores.begin()+end); + qScores = hold; + } } seqLength = qScores.size(); @@ -267,7 +264,8 @@ bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int int end = windowSize; int start = 0; - + if(seqLength < windowSize) { return 0; } + while(start < seqLength){ double windowSum = 0.0000; @@ -340,3 +338,24 @@ bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){ } /**************************************************************************************************/ + +void QualityScores::updateQScoreErrorMap(map >& qualErrorMap, string errorSeq, int start, int stop, int weight){ + try { + + for(int i=start-1;ierrorOut(e, "TrimSeqsCommand", "updateQScoreErrorMap"); + exit(1); + } +} + +/**************************************************************************************************/ diff --git a/qualityscores.h b/qualityscores.h index 5db4311..dee5917 100644 --- a/qualityscores.h +++ b/qualityscores.h @@ -29,6 +29,7 @@ public: bool stripQualRollingAverage(Sequence&, double); bool stripQualWindowAverage(Sequence&, int, int, double); bool cullQualAverage(Sequence&, double); + void updateQScoreErrorMap(map >&, string, int, int, int); private: double calculateAverage(); diff --git a/reportfile.cpp b/reportfile.cpp new file mode 100644 index 0000000..f392ed9 --- /dev/null +++ b/reportfile.cpp @@ -0,0 +1,73 @@ +/* + * reportfile.cpp + * Mothur + * + * Created by Pat Schloss on 12/19/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "reportfile.h" + +/**************************************************************************************************/ + +ReportFile::ReportFile(){ + try { + m = MothurOut::getInstance(); + } + catch(exception& e) { + m->errorOut(e, "ReportFile", "ReportFile"); + exit(1); + } +} + +/**************************************************************************************************/ + +ReportFile::ReportFile(ifstream& repFile, string repFileName){ + try { + m = MothurOut::getInstance(); + + m->openInputFile(repFileName, repFile); + m->getline(repFile); + } + catch(exception& e) { + m->errorOut(e, "ReportFile", "ReportFile"); + exit(1); + } +} + + +/**************************************************************************************************/ + +ReportFile::ReportFile(ifstream& repFile){ + try { + + m = MothurOut::getInstance(); + + repFile >> queryName; + repFile >> queryLength; + repFile >> templateName; + repFile >> templateLength; + repFile >> searchMethod; + repFile >> searchScore; + repFile >> alignmentMethod; + repFile >> queryStart; + repFile >> queryEnd; + repFile >> templateStart; + repFile >> templateEnd; + repFile >> pairwiseAlignmentLength; + repFile >> gapsInQuery; + repFile >> gapsInTemplate; + repFile >> longestInsert; + repFile >> simBtwnQueryAndTemplate; + + m->gobble(repFile); + } + catch(exception& e) { + m->errorOut(e, "ReportFile", "ReportFile"); + exit(1); + } + +} + +/**************************************************************************************************/ diff --git a/reportfile.h b/reportfile.h new file mode 100644 index 0000000..1bd2c94 --- /dev/null +++ b/reportfile.h @@ -0,0 +1,57 @@ +#ifndef REPORTFILE +#define REPORTFILE + +/* + * reportfile.h + * Mothur + * + * Created by Pat Schloss on 7/12/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + + +#include "mothur.h" +#include "mothurout.h" + +/**************************************************************************************************/ + +class ReportFile { +public: + ReportFile(); + ReportFile(ifstream&); + ReportFile(ifstream&, string); + + string getQueryName() { return queryName; } + string getTemplateName() { return templateName; } + string getSearchMethod() { return searchMethod; } + string getAlignmentMethod() { return alignmentMethod; } + + int getQueryLength() { return queryLength; } + int getTemplateLength() { return templateLength; } + int getQueryStart() { return queryStart; } + int getQueryEnd() { return queryEnd; } + int getTemplateStart() { return templateStart; } + int getTemplateEnd() { return templateEnd; } + int getPairwiseAlignmentLength() { return pairwiseAlignmentLength; } + int getGapsInQuery() { return gapsInQuery; } + int getGapsInTemplate() { return gapsInTemplate; } + int getLongestInsert() { return longestInsert; } + + float getSearchScore() { return searchScore; } + float getSimBtwnQueryAndTemplate() { return simBtwnQueryAndTemplate; } + + +private: + + MothurOut* m; + + string queryName, templateName, searchMethod, alignmentMethod; + int queryLength, templateLength, queryStart, queryEnd, templateStart, templateEnd, pairwiseAlignmentLength, gapsInQuery, gapsInTemplate, longestInsert; + float searchScore, simBtwnQueryAndTemplate; + +}; + +/**************************************************************************************************/ + +#endif diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp index ea04954..77b12ed 100644 --- a/seqerrorcommand.cpp +++ b/seqerrorcommand.cpp @@ -8,11 +8,13 @@ */ #include "seqerrorcommand.h" +#include "reportfile.h" +#include "qualityscores.h" //********************************************************************************************************************** vector SeqErrorCommand::getValidParameters(){ try { - string Array[] = {"query", "reference", "name", "threshold", "inputdir", "outputdir"}; + string Array[] = {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -72,7 +74,7 @@ SeqErrorCommand::SeqErrorCommand(string option) { string temp; //valid paramters for this command - string AlignArray[] = {"query", "reference", "name", "threshold", "inputdir", "outputdir"}; + string AlignArray[] = {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"}; //need to implement name file option @@ -116,12 +118,28 @@ SeqErrorCommand::SeqErrorCommand(string option) { } it = parameters.find("name"); - //user has given a template file + //user has given a names file if(it != parameters.end()){ path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["name"] = inputDir + it->second; } } + + it = parameters.find("qfile"); + //user has given a quality score file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["qfile"] = inputDir + it->second; } + } + + it = parameters.find("report"); + //user has given a alignment report file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["report"] = inputDir + it->second; } + } } //check for required parameters @@ -133,10 +151,24 @@ SeqErrorCommand::SeqErrorCommand(string option) { if (referenceFileName == "not found") { m->mothurOut("reference is a required parameter for the seq.error command."); m->mothurOutEndLine(); abort = true; } else if (referenceFileName == "not open") { abort = true; } - //if the user changes the output directory command factory will send this info to us in the output parameter + + //check for optional parameters namesFileName = validParameter.validFile(parameters, "name", true); if(namesFileName == "not found"){ namesFileName = ""; } - cout << namesFileName << endl; + + qualFileName = validParameter.validFile(parameters, "qfile", true); + if(qualFileName == "not found"){ qualFileName = ""; } + + reportFileName = validParameter.validFile(parameters, "report", true); + if(reportFileName == "not found"){ reportFileName = ""; } + + if((reportFileName != "" && qualFileName == "") || (reportFileName == "" && qualFileName != "")){ + m->mothurOut("if you use either a qual file or a report file, you have to have both."); + m->mothurOutEndLine(); + abort = true; + } + + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ @@ -205,7 +237,18 @@ int SeqErrorCommand::execute(){ ifstream queryFile; m->openInputFile(queryFileName, queryFile); - + + ifstream reportFile; + ifstream qualFile; + + ReportFile report; + QualityScores quality; + + if(qualFileName != "" && reportFileName != ""){ + m->openInputFile(qualFileName, qualFile); + report = ReportFile(reportFile, reportFileName); + } + int totalBases = 0; int totalMatches = 0; @@ -214,10 +257,14 @@ int SeqErrorCommand::execute(){ int numSeqs = 0; map::iterator it; + map > qScoreErrorMap; + qScoreErrorMap['m'].assign(41, 0); + qScoreErrorMap['s'].assign(41, 0); + qScoreErrorMap['i'].assign(41, 0); + qScoreErrorMap['a'].assign(41, 0); while(queryFile){ Compare minCompare; - Sequence query(queryFile); for(int i=0;isecond; } - else { - minCompare.weight = 1; - } + else { minCompare.weight = 1; } printErrorData(minCompare); + if(qualFileName != "" && reportFileName != ""){ + report = ReportFile(reportFile); + + int origLength = report.getQueryLength(); + int startBase = report.getQueryStart(); + int endBase = report.getQueryEnd(); + + quality = QualityScores(qualFile, origLength); + quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight); + } + if(minCompare.errorRate < threshold){ totalBases += (minCompare.total * minCompare.weight); totalMatches += minCompare.matches * minCompare.weight; @@ -250,11 +305,23 @@ int SeqErrorCommand::execute(){ numSeqs++; } + } queryFile.close(); int total = 0; + if(qualFileName != "" && reportFileName != ""){ + string errorQualityFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.quality"; + ofstream errorQualityFile; + m->openOutputFile(errorQualityFileName, errorQualityFile); + outputNames.push_back(errorQualityFileName); outputTypes["error.quality"].push_back(errorQualityFileName); + + errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl; + for(int i=0;i<41;i++){ + errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl; + } + } string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count"; ofstream errorCountFile; diff --git a/seqerrorcommand.h b/seqerrorcommand.h index c8dd4ef..a012d68 100644 --- a/seqerrorcommand.h +++ b/seqerrorcommand.h @@ -60,7 +60,7 @@ private: void printErrorHeader(); void printErrorData(Compare); - string queryFileName, referenceFileName, namesFileName, errorSummaryFileName, errorSeqFileName, outputDir; + string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, errorSummaryFileName, errorSeqFileName, outputDir; double threshold; int numRefs; ofstream errorSummaryFile, errorSeqFile; diff --git a/sequence.cpp b/sequence.cpp index 78fa266..42c26d5 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -565,4 +565,16 @@ void Sequence::reverseComplement(){ aligned = temp; } -/**************************************************************************************************/ + +//******************************************************************************************************************** + +void Sequence::trim(int length){ + + if(numBases > length){ + unaligned = unaligned.substr(0,length); + numBases = length; + } + +} + +///**************************************************************************************************/ diff --git a/sequence.hpp b/sequence.hpp index c634f78..48287af 100644 --- a/sequence.hpp +++ b/sequence.hpp @@ -37,6 +37,7 @@ public: void setAligned(string); void setLength(); void reverseComplement(); + void trim(int); string convert2ints(); string getName(); diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index acbead2..9926261 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -14,7 +14,9 @@ vector TrimSeqsCommand::getValidParameters(){ try { string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", - "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; + "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", + "keepfirst", "removelast", + "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -74,8 +76,10 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else { //valid paramters for this command - string AlignArray[] = {"fasta", "flip", "group","oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", - "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; + string AlignArray[] = { "fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", + "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", + "keepfirst", "removelast", + "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); @@ -204,14 +208,20 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; } convert(temp, qWindowAverage); - temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "100"; } + temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; } convert(temp, qWindowSize); - temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "10"; } + temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; } convert(temp, qWindowStep); temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; } convert(temp, qAverage); + + temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; } + convert(temp, keepFirst); + + temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; } + convert(temp, removeLast); temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; } allFiles = m->isTrue(temp); @@ -337,7 +347,7 @@ int TrimSeqsCommand::execute(){ outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); getOligos(fastaFileNames, qualFileNames); } - + vector fastaFilePos; vector qFilePos; @@ -507,25 +517,12 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string string trashCode = ""; int currentSeqsDiffs = 0; - if(qFileName != ""){ - if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); } - else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); } - else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); } - else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); } - - if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { - success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap - } - - if(!success) { trashCode += 'q'; } - } - if(barcodes.size() != 0){ success = stripBarcode(currSeq, currQual, groupBar); if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } - + if(numFPrimers != 0){ success = stripForward(currSeq, currQual, groupPrime); if(success > pdiffs) { trashCode += 'f'; } @@ -533,11 +530,36 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } - + if(numRPrimers != 0){ success = stripReverse(currSeq, currQual); if(!success) { trashCode += 'r'; } } + + if(keepFirst != 0){ + success = keepFirstTrim(currSeq, currQual); + } + + if(removeLast != 0){ + success = removeLastTrim(currSeq, currQual); + if(!success) { trashCode += 'l'; } + } + + + if(qFileName != ""){ + + if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); } + else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); } + else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); } + else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); } + else { success = 1; } + +// if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { +// success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap +// } + + if(!success) { trashCode += 'q'; } + } if(minLength > 0 || maxLength > 0){ success = cullLength(currSeq); @@ -552,7 +574,10 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(!success) { trashCode += 'n'; } } - if(flip){ currSeq.reverseComplement(); currQual.flipQScores(); } // should go last + if(flip){ // should go last + currSeq.reverseComplement(); + currQual.flipQScores(); + } if(trashCode.length() == 0){ currSeq.setAligned(currSeq.getUnaligned()); @@ -1128,7 +1153,6 @@ int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group seq.setUnaligned(rawSequence.substr(oligo.length())); if(qual.getName() != ""){ qual.trimQScores(oligo.length(), -1); - } success = 0; break; @@ -1266,6 +1290,52 @@ bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){ //*************************************************************************************************************** +bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){ + try { + bool success = 1; + if(qscores.getName() != ""){ + qscores.trimQScores(-1, keepFirst); + } + sequence.trim(keepFirst); + return success; + } + catch(exception& e) { + m->errorOut(e, "keepFirstTrim", "countDiffs"); + exit(1); + } + +} + +//*************************************************************************************************************** + +bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){ + try { + bool success = 0; + + int length = sequence.getNumBases() - removeLast; + + if(length > 0){ + if(qscores.getName() != ""){ + qscores.trimQScores(-1, length); + } + sequence.trim(length); + success = 1; + } + else{ + success = 0; + } + + return success; + } + catch(exception& e) { + m->errorOut(e, "removeLastTrim", "countDiffs"); + exit(1); + } + +} + +//*************************************************************************************************************** + bool TrimSeqsCommand::cullLength(Sequence& seq){ try { @@ -1397,78 +1467,5 @@ int TrimSeqsCommand::countDiffs(string oligo, string seq){ } } -//*************************************************************************************************************** - -//bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){ -// try { -// -// string rawSequence = seq.getUnaligned(); -// int seqLength = seq.getNumBases(); -// bool success = 0; //guilty until proven innocent -// string name; -// -// qFile >> name; -// if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } } -// -// while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } -// -// int score; -// int end = seqLength; -// -// for(int i=0;i> score; -// -// if(score < qThreshold){ -// end = i; -// break; -// } -// } -// for(int i=end+1;i> score; -// } -// -// seq.setUnaligned(rawSequence.substr(0,end)); -// -// return 1; -// } -// catch(exception& e) { -// m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold"); -// exit(1); -// } -//} - -//*************************************************************************************************************** - -//bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){ -// try { -// string rawSequence = seq.getUnaligned(); -// int seqLength = seq.getNumBases(); -// bool success = 0; //guilty until proven innocent -// string name; -// -// qFile >> name; -// if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } } -// -// while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } -// -// float score; -// float average = 0; -// -// for(int i=0;i> score; -// average += score; -// } -// average /= seqLength; -// -// if(average >= qAverage) { success = 1; } -// else { success = 0; } -// -// return success; -// } -// catch(exception& e) { -// m->errorOut(e, "TrimSeqsCommand", "cullQualAverage"); -// exit(1); -// } -//} //*************************************************************************************************************** diff --git a/trimseqscommand.h b/trimseqscommand.h index 2f8fbd7..21a5d41 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -42,6 +42,9 @@ private: int stripBarcode(Sequence&, QualityScores&, int&); int stripForward(Sequence&, QualityScores&, int&); bool stripReverse(Sequence&, QualityScores&); + bool keepFirstTrim(Sequence&, QualityScores&); + bool removeLastTrim(Sequence&, QualityScores&); + bool cullLength(Sequence&); bool cullHomoP(Sequence&); bool cullAmbigs(Sequence&); @@ -54,7 +57,7 @@ private: bool flip, allFiles, qtrim; int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, comboStarts; - int qWindowSize, qWindowStep; + int qWindowSize, qWindowStep, keepFirst, removeLast; double qRollAverage, qThreshold, qWindowAverage, qAverage; vector revPrimer, outputNames; set filesToRemove;