X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimoligos.cpp;h=0f2a9cbb0e584ddfa393a53015b02b37ccfa3903;hb=250e3b11b1c9c1e1ad458ab6c7e71ac2e67e11d9;hp=5d375a75150c1cc86c5b6c39e3ac21cdeeb7f110;hpb=859e3a473a3e63e0060c49be70b80f9289253da2;p=mothur.git diff --git a/trimoligos.cpp b/trimoligos.cpp index 5d375a7..0f2a9cb 100644 --- a/trimoligos.cpp +++ b/trimoligos.cpp @@ -368,6 +368,8 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){ temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); + if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); } + if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -431,10 +433,10 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr break; } - if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { + if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) { group = it->first; forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); success = 0; break; } @@ -487,6 +489,8 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); + if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + if (alnLength == 0) { numDiff = bdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; @@ -548,6 +552,9 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); + + if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + if (alnLength == 0) { numDiff = bdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; @@ -577,19 +584,6 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > bdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -654,12 +648,12 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality break; } - if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { + if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) { group = it->first; forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); forwardQual.trimQScores(foligo.length(), -1); - reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length()); + reverseQual.trimQScores(roligo.length(), -1); success = 0; break; } @@ -713,7 +707,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality int numDiff = countDiffs(oligo, temp); if (alnLength == 0) { numDiff = bdiffs + 100; } - //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } if(numDiff < minDiff){ minDiff = numDiff; @@ -775,6 +769,8 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality int numDiff = countDiffs(oligo, temp); if (alnLength == 0) { numDiff = bdiffs + 100; } + if (m->debug) { m->mothurOut("[DEBUG]: reverse " + reverseSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; if(numDiff < minDiff){ minDiff = numDiff; @@ -802,19 +798,6 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > bdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -881,6 +864,8 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou break; } + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { group = it->first; string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode @@ -1033,19 +1018,6 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > bdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -1065,6 +1037,7 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou group = matches[0]; fStart = minFPos[0]; rStart = rawSeq.length() - minRPos[0]; + if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence } //we have an acceptable match for the forward and reverse, but do they match? @@ -1116,6 +1089,8 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou break; } + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { group = it->first; if (!keepForward) { @@ -1165,7 +1140,7 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou success = pdiffs + 10; break; } - //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl; + //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl; //use needleman to align first barcode.length()+numdiffs of sequence to each barcode alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs)); oligo = alignment->getSeqAAln(); @@ -1270,19 +1245,6 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > pdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -1302,6 +1264,7 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou group = matches[0]; fStart = minFPos[0]; rStart = rawSeq.length() - minRPos[0]; + if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence } //we have an acceptable match for the forward and reverse, but do they match? @@ -1355,12 +1318,12 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality break; } - if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { + if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) { group = it->first; forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); forwardQual.trimQScores(foligo.length(), -1); - reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length()); + reverseQual.trimQScores(roligo.length(), -1); success = 0; break; } @@ -1416,6 +1379,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality if (alnLength == 0) { numDiff = pdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -1476,6 +1441,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality int numDiff = countDiffs(oligo, temp); if (alnLength == 0) { numDiff = pdiffs + 100; } + if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; if(numDiff < minDiff){ minDiff = numDiff; @@ -1503,19 +1470,6 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > pdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -1585,7 +1539,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { group = it->first; forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); success = 0; break; } @@ -1638,6 +1592,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); + if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + if (alnLength == 0) { numDiff = pdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; @@ -1699,6 +1655,9 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); + + if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + if (alnLength == 0) { numDiff = pdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; @@ -1728,19 +1687,6 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > pdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -1845,6 +1791,8 @@ int TrimOligos::stripBarcode(Sequence& seq, int& group){ int numDiff = countDiffs(oligo, temp); + if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); } + if(numDiff < minDiff){ minDiff = numDiff; minCount = 1;