X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=705ac25f5a5e727424a2567a0d358b4c1dc951e5;hb=8ef6687c1f586285d01c000cc5e359bf9c07c717;hp=ae9c436440950cca6cd33eb99e85c5b9b4db1062;hpb=4a2d841cb97fb02351022efe9d7068b1dc212bf9;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index ae9c436..705ac25 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -364,7 +364,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string return 0; } - bool success = 1; + int success = 1; Sequence currSeq(inFASTA); @@ -377,15 +377,16 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(qFileName != ""){ if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); } else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); } - if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { + + if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap } + if(!success) { trashCode += 'q'; } } if(barcodes.size() != 0){ success = stripBarcode(currSeq, group); -// cout << "here: " << success << endl; if(success > bdiffs){ trashCode += 'b'; } else{ currentSeqsDiffs += success; } } @@ -419,7 +420,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(flip){ currSeq.reverseComplement(); } // should go last if(trashCode.length() == 0){ - currSeq.setAligned(currSeq.getUnaligned()); //this is because of a modification we made to the sequence class to fix a bug. all seqs have an aligned version, which is the version that gets printed. + currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(outFASTA); if(barcodes.size() != 0){ outGroups << currSeq.getName() << '\t' << groupVector[group] << endl; @@ -432,6 +433,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string else{ currSeq.setName(currSeq.getName() + '|' + trashCode); currSeq.setUnaligned(origSeq); + currSeq.setAligned(origSeq); currSeq.printSequence(scrapFASTA); } } @@ -610,15 +612,16 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec){ //vector::iterator it=barcodes.begin();it!=barcodes.end();it++){ string oligo = it->first; if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length - success = bdiffs + 1; - break; + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; } if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ @@ -662,7 +665,7 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ // int length = oligo.length(); if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length - success = bdiffs + 1; + success = bdiffs + 10; break; } @@ -678,7 +681,6 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); -// int newStart=0; int numDiff = countDiffs(oligo, temp); @@ -698,17 +700,20 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ } } - if(minDiff > bdiffs){ success = bdiffs + 1; } - else if(minCount > 1) { success = bdiffs + 1; } - else{ + + if(minDiff > bdiffs) { success = minDiff; } //no good matches + else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes + else{ //use the best match group = minGroup; - seq.setUnaligned("*" + rawSequence.substr(minPos)); + seq.setUnaligned(rawSequence.substr(minPos)); success = minDiff; } if (alignment != NULL) { delete alignment; } } +// cout << success << endl; + return success; } @@ -724,7 +729,7 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ int TrimSeqsCommand::stripForward(Sequence& seq){ try { string rawSequence = seq.getUnaligned(); - bool success = pdiffs + 1; //guilty until proven innocent + int success = pdiffs + 1; //guilty until proven innocent //can you find the primer for(int i=0;i pdiffs){ success = pdiffs + 1; } - else if(minCount > 1) { success = pdiffs + 1; } + if(minDiff > pdiffs) { success = minDiff; } + else if(minCount > 1) { success = pdiffs + 10; } else{ - seq.setUnaligned("*" + rawSequence.substr(minPos)); + seq.setUnaligned(rawSequence.substr(minPos)); success = minDiff; } @@ -992,16 +997,41 @@ int TrimSeqsCommand::countDiffs(string oligo, string seq){ bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){ try { +// string rawSequence = seq.getUnaligned(); +// int seqLength; // = rawSequence.length(); +// string name, temp, temp2; +// +// qFile >> name; +// +// //get rest of line +// temp = ""; +// while (!qFile.eof()) { +// char c = qFile.get(); +// if (c == 10 || c == 13){ break; } +// else { temp += c; } +// } +// +// int pos = temp.find("length"); +// if (pos == temp.npos) { m->mothurOut("Cannot find length in qfile for " + seq.getName()); m->mothurOutEndLine(); seqLength = 0; } +// else { +// string tempLength = temp.substr(pos); +// istringstream iss (tempLength,istringstream::in); +// iss >> temp; +// } +// +// splitAtEquals(temp2, temp); //separates length=242, temp=length, temp2=242 +// convert(temp, seqLength); //converts string to int +// +// if (name.length() != 0) { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); } } + string rawSequence = seq.getUnaligned(); - int seqLength; // = rawSequence.length(); - string name, temp, temp2; + int seqLength = seq.getNumBases(); + bool success = 0; //guilty until proven innocent + string name; + + qFile >> name; + if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } } - qFile >> name >> temp; - - splitAtEquals(temp2, temp); //separates length=242, temp=length, temp2=242 - convert(temp, seqLength); //converts string to int - - if (name.length() != 0) { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); } } while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } int score; @@ -1010,7 +1040,7 @@ bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){ for(int i=0;i> score; - if(score <= qThreshold){ + if(score < qThreshold){ end = i; break; }