X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=580fc22c978e0105b464e214593599fe15970f1d;hb=f07bf12e0d04340698aff7a36d2fee7c959ffe59;hp=7835b7598ae6ddcc76001788d44daf1faf17cfdd;hpb=44b464979e91c2a1f144ac45ca12c14fcf6579f0;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 7835b75..580fc22 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -163,7 +163,7 @@ void TrimSeqsCommand::help(){ m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n"); m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n"); m->mothurOut("The fasta parameter is required.\n"); - m->mothurOut("The flip parameter .... The default is 0.\n"); + m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"); m->mothurOut("The oligos parameter .... The default is "".\n"); m->mothurOut("The maxambig parameter .... The default is -1.\n"); m->mothurOut("The maxhomop parameter .... The default is 0.\n"); @@ -224,8 +224,9 @@ int TrimSeqsCommand::execute(){ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ ifstream inFASTA; + int numSeqs; openInputFile(fastaFile, inFASTA); - int numSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + getNumSeqs(inFASTA, numSeqs); inFASTA.close(); lines.push_back(new linePair(0, numSeqs)); @@ -266,8 +267,9 @@ int TrimSeqsCommand::execute(){ if (m->control_pressed) { return 0; } #else ifstream inFASTA; + int numSeqs; openInputFile(fastaFile, inFASTA); - int numSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + getNumSeqs(inFASTA, numSeqs); inFASTA.close(); lines.push_back(new linePair(0, numSeqs)); @@ -377,9 +379,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(qFileName != ""){ if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); } else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); } - if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { + + if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap } + if(!success) { trashCode += 'q'; } } @@ -648,7 +652,7 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ maxLength = it->first.length(); } } - alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, (maxLength+bdiffs+1)); + alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1)); }else{ alignment = NULL; } @@ -682,6 +686,9 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ int newStart=0; int numDiff = countDiffs(oligo, temp); + +// cout << oligo << '\t' << temp << '\t' << numDiff << endl; + if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -762,7 +769,7 @@ int TrimSeqsCommand::stripForward(Sequence& seq){ maxLength = forPrimer[i].length(); } } - alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, (maxLength+pdiffs+1)); + alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1)); }else{ alignment = NULL; } @@ -995,32 +1002,42 @@ int TrimSeqsCommand::countDiffs(string oligo, string seq){ bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){ try { +// string rawSequence = seq.getUnaligned(); +// int seqLength; // = rawSequence.length(); +// string name, temp, temp2; +// +// qFile >> name; +// +// //get rest of line +// temp = ""; +// while (!qFile.eof()) { +// char c = qFile.get(); +// if (c == 10 || c == 13){ break; } +// else { temp += c; } +// } +// +// int pos = temp.find("length"); +// if (pos == temp.npos) { m->mothurOut("Cannot find length in qfile for " + seq.getName()); m->mothurOutEndLine(); seqLength = 0; } +// else { +// string tempLength = temp.substr(pos); +// istringstream iss (tempLength,istringstream::in); +// iss >> temp; +// } +// +// splitAtEquals(temp2, temp); //separates length=242, temp=length, temp2=242 +// convert(temp, seqLength); //converts string to int +// +// if (name.length() != 0) { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); } } + string rawSequence = seq.getUnaligned(); - int seqLength; // = rawSequence.length(); - string name, temp, temp2; + int seqLength = seq.getNumBases(); + bool success = 0; //guilty until proven innocent + string name; qFile >> name; + if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } } - //get rest of line - temp = ""; - while (!qFile.eof()) { - char c = qFile.get(); - if (c == 10 || c == 13){ break; } - else { temp += c; } - } - - int pos = temp.find("length"); - if (pos == temp.npos) { m->mothurOut("Cannot find length in qfile for " + seq.getName()); m->mothurOutEndLine(); seqLength = 0; } - else { - string tempLength = temp.substr(pos); - istringstream iss (tempLength,istringstream::in); - iss >> temp; - } - - splitAtEquals(temp2, temp); //separates length=242, temp=length, temp2=242 - convert(temp, seqLength); //converts string to int - - if (name.length() != 0) { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); } } + while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } int score; int end = seqLength; @@ -1028,7 +1045,7 @@ bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){ for(int i=0;i> score; - if(score <= qThreshold){ + if(score < qThreshold){ end = i; break; }