X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=trimoligos.cpp;h=c7016d8b08a5804d1636a9ab8fde27b891f2a0a6;hp=5d375a75150c1cc86c5b6c39e3ac21cdeeb7f110;hb=fefd5ee1517abd3bc38b469cb2dffc85a1571c7e;hpb=196c22d0f93ba48e8ec54ab76608b6e3ba5e68cc diff --git a/trimoligos.cpp b/trimoligos.cpp index 5d375a7..c7016d8 100644 --- a/trimoligos.cpp +++ b/trimoligos.cpp @@ -368,6 +368,8 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){ temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); + if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); } + if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -431,10 +433,10 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr break; } - if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { + if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) { group = it->first; forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); success = 0; break; } @@ -487,6 +489,8 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); + if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + if (alnLength == 0) { numDiff = bdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; @@ -548,6 +552,9 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); + + if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + if (alnLength == 0) { numDiff = bdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; @@ -577,19 +584,6 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > bdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -654,12 +648,12 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality break; } - if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { + if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) { group = it->first; forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); forwardQual.trimQScores(foligo.length(), -1); - reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length()); + reverseQual.trimQScores(roligo.length(), -1); success = 0; break; } @@ -713,7 +707,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality int numDiff = countDiffs(oligo, temp); if (alnLength == 0) { numDiff = bdiffs + 100; } - //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } if(numDiff < minDiff){ minDiff = numDiff; @@ -775,6 +769,8 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality int numDiff = countDiffs(oligo, temp); if (alnLength == 0) { numDiff = bdiffs + 100; } + if (m->debug) { m->mothurOut("[DEBUG]: reverse " + reverseSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; if(numDiff < minDiff){ minDiff = numDiff; @@ -802,21 +798,16 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > bdiffs) { success = minDiff; } //no good matches else { + if (m->debug) { + string fMatches = ""; + for (int i = 0; i < minFGroup.size(); i++) { for (int j = 0; j < minFGroup[i].size(); j++) { fMatches += toString(minFGroup[i][j]) + " "; } } + m->mothurOut("[DEBUG]: forward matches = " + fMatches + "\n"); + string rMatches = ""; + for (int i = 0; i < minRGroup.size(); i++) { for (int j = 0; j < minRGroup[i].size(); j++) { rMatches += toString(minRGroup[i][j]) + " "; } } + m->mothurOut("[DEBUG]: reverse matches = " + rMatches + "\n"); + } bool foundMatch = false; vector matches; for (int i = 0; i < minFGroup.size(); i++) { @@ -834,6 +825,9 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality group = matches[0]; fStart = minFPos[0]; rStart = minRPos[0]; + if (m->debug) { m->mothurOut("[DEBUG]: found match = " + toString(matches[0]) + ".\n"); } + }else { + if (m->debug) { m->mothurOut("[DEBUG]: failed too many matches.\n"); } } //we have an acceptable match for the forward and reverse, but do they match? @@ -881,6 +875,8 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou break; } + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { group = it->first; string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode @@ -1033,19 +1029,6 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > bdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -1065,6 +1048,7 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou group = matches[0]; fStart = minFPos[0]; rStart = rawSeq.length() - minRPos[0]; + if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence } //we have an acceptable match for the forward and reverse, but do they match? @@ -1092,6 +1076,222 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou exit(1); } +} +//*******************************************************************/ +int TrimOligos::stripPairedBarcode(Sequence& seq, int& group){ + try { + //look for forward barcode + string rawSeq = seq.getUnaligned(); + int success = bdiffs + 1; //guilty until proven innocent + //cout << seq.getName() << endl; + //can you find the forward barcode + for(map::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){ + string foligo = it->second.forward; + string roligo = it->second.reverse; + //cout << it->first << '\t' << foligo << '\t' << roligo << endl; + //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl; + if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { + group = it->first; + string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode + seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode + success = 0; + break; + } + } + //cout << "success=" << success << endl; + //if you found the barcode or if you don't want to allow for diffs + if ((bdiffs == 0) || (success == 0)) { return success; } + else { //try aligning and see if you can find it + Alignment* alignment; + if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + vector< vector > minFGroup; + vector minFPos; + + //the pair can have duplicates, but we only want to search for the unique forward and reverses, example + /* + 1 Sarah Westcott + 2 John Westcott + 3 Anna Westcott + 4 Sarah Schloss + 5 Pat Schloss + 6 Gail Brown + 7 Pat Moore + only want to look for forward = Sarah, John, Anna, Pat, Gail + reverse = Westcott, Schloss, Brown, Moore + but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. + */ + //cout << endl << forwardSeq.getName() << endl; + for(map >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){ + string oligo = it->first; + + if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; + break; + } + //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl; + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + + if (alnLength == 0) { numDiff = bdiffs + 100; } + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minFGroup.clear(); + minFGroup.push_back(it->second); + int tempminFPos = 0; + minFPos.clear(); + for(int i=0;isecond); + int tempminFPos = 0; + for(int i=0;i bdiffs) { success = minDiff; } //no good matches + else{ + //check for reverse match + if (alignment != NULL) { delete alignment; } + + if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + minDiff = 1e6; + minCount = 1; + vector< vector > minRGroup; + vector minRPos; + + string rawRSequence = reverseOligo(seq.getUnaligned()); + //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl; + for(map >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){ + string oligo = reverseOligo(it->first); + //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl; + if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + if (alnLength == 0) { numDiff = bdiffs + 100; } + + //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl; + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minRGroup.clear(); + minRGroup.push_back(it->second); + int tempminRPos = 0; + minRPos.clear(); + for(int i=0;isecond); + } + + } + + if(minDiff > bdiffs) { success = minDiff; } //no good matches + else { + bool foundMatch = false; + vector matches; + for (int i = 0; i < minFGroup.size(); i++) { + for (int j = 0; j < minFGroup[i].size(); j++) { + for (int k = 0; k < minRGroup.size(); k++) { + if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); } + } + } + } + + int fStart = 0; + int rStart = 0; + if (matches.size() == 1) { + foundMatch = true; + group = matches[0]; + fStart = minFPos[0]; + rStart = rawSeq.length() - minRPos[0]; + if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence + } + + //we have an acceptable match for the forward and reverse, but do they match? + if (foundMatch) { + string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode + seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode + success = minDiff; + //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl; + }else { success = bdiffs + 100; } + } + } + + if (alignment != NULL) { delete alignment; } + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripPairedBarcode"); + exit(1); + } + } //*******************************************************************/ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){ @@ -1116,6 +1316,8 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou break; } + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { group = it->first; if (!keepForward) { @@ -1165,7 +1367,7 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou success = pdiffs + 10; break; } - //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl; + //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl; //use needleman to align first barcode.length()+numdiffs of sequence to each barcode alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs)); oligo = alignment->getSeqAAln(); @@ -1270,19 +1472,6 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > pdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -1302,6 +1491,7 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou group = matches[0]; fStart = minFPos[0]; rStart = rawSeq.length() - minRPos[0]; + if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence } //we have an acceptable match for the forward and reverse, but do they match? @@ -1332,7 +1522,223 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou } } - +//*******************************************************************/ +int TrimOligos::stripPairedPrimers(Sequence& seq, int& group){ + try { + //look for forward + string rawSeq = seq.getUnaligned(); + int success = pdiffs + 1; //guilty until proven innocent + //cout << seq.getName() << endl; + //can you find the forward + for(map::iterator it=ipprimers.begin();it!=ipprimers.end();it++){ + string foligo = it->second.forward; + string roligo = it->second.reverse; + + //cout << it->first << '\t' << foligo << '\t' << roligo << endl; + //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl; + if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { + group = it->first; + string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode + seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode + success = 0; + break; + } + } + //cout << "success=" << success << endl; + //if you found the barcode or if you don't want to allow for diffs + if ((pdiffs == 0) || (success == 0)) { return success; } + else { //try aligning and see if you can find it + Alignment* alignment; + if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + vector< vector > minFGroup; + vector minFPos; + + //the pair can have duplicates, but we only want to search for the unique forward and reverses, example + /* + 1 Sarah Westcott + 2 John Westcott + 3 Anna Westcott + 4 Sarah Schloss + 5 Pat Schloss + 6 Gail Brown + 7 Pat Moore + only want to look for forward = Sarah, John, Anna, Pat, Gail + reverse = Westcott, Schloss, Brown, Moore + but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. + */ + //cout << endl << forwardSeq.getName() << endl; + for(map >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){ + string oligo = it->first; + + if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; + break; + } + //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl; + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + + if (alnLength == 0) { numDiff = pdiffs + 100; } + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minFGroup.clear(); + minFGroup.push_back(it->second); + int tempminFPos = 0; + minFPos.clear(); + for(int i=0;isecond); + int tempminFPos = 0; + for(int i=0;i pdiffs) { success = minDiff; } //no good matches + else{ + //check for reverse match + if (alignment != NULL) { delete alignment; } + + if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + minDiff = 1e6; + minCount = 1; + vector< vector > minRGroup; + vector minRPos; + + string rawRSequence = reverseOligo(seq.getUnaligned()); + + for(map >::iterator it=irprimers.begin();it!=irprimers.end();it++){ + string oligo = reverseOligo(it->first); + //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl; + if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + if (alnLength == 0) { numDiff = pdiffs + 100; } + + //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl; + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minRGroup.clear(); + minRGroup.push_back(it->second); + int tempminRPos = 0; + minRPos.clear(); + for(int i=0;isecond); + } + + } + + if(minDiff > pdiffs) { success = minDiff; } //no good matches + else { + bool foundMatch = false; + vector matches; + for (int i = 0; i < minFGroup.size(); i++) { + for (int j = 0; j < minFGroup[i].size(); j++) { + for (int k = 0; k < minRGroup.size(); k++) { + if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); } + } + } + } + + int fStart = 0; + int rStart = 0; + if (matches.size() == 1) { + foundMatch = true; + group = matches[0]; + fStart = minFPos[0]; + rStart = rawSeq.length() - minRPos[0]; + if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence + } + + //we have an acceptable match for the forward and reverse, but do they match? + if (foundMatch) { + string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode + seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode + success = minDiff; + //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl; + }else { success = pdiffs + 100; } + } + } + + if (alignment != NULL) { delete alignment; } + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripPairedPrimers"); + exit(1); + } + +} //*******************************************************************/ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){ try { @@ -1355,12 +1761,12 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality break; } - if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { + if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) { group = it->first; forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); forwardQual.trimQScores(foligo.length(), -1); - reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length()); + reverseQual.trimQScores(roligo.length(), -1); success = 0; break; } @@ -1416,6 +1822,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality if (alnLength == 0) { numDiff = pdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -1476,6 +1884,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality int numDiff = countDiffs(oligo, temp); if (alnLength == 0) { numDiff = pdiffs + 100; } + if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; if(numDiff < minDiff){ minDiff = numDiff; @@ -1503,19 +1913,6 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > pdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -1585,7 +1982,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { group = it->first; forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); success = 0; break; } @@ -1638,6 +2035,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); + if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + if (alnLength == 0) { numDiff = pdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; @@ -1699,6 +2098,9 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); + + if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); } + if (alnLength == 0) { numDiff = pdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; @@ -1728,19 +2130,6 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ if(minDiff > pdiffs) { success = minDiff; } //no good matches else { bool foundMatch = false; @@ -1787,6 +2176,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr int TrimOligos::stripBarcode(Sequence& seq, int& group){ try { + if (paired) { int success = stripPairedBarcode(seq, group); return success; } + string rawSequence = seq.getUnaligned(); int success = bdiffs + 1; //guilty until proven innocent @@ -1845,6 +2236,8 @@ int TrimOligos::stripBarcode(Sequence& seq, int& group){ int numDiff = countDiffs(oligo, temp); + if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); } + if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -1888,6 +2281,8 @@ int TrimOligos::stripBarcode(Sequence& seq, int& group){ int TrimOligos::stripForward(Sequence& seq, int& group){ try { + if (paired) { int success = stripPairedPrimers(seq, group); return success; } + string rawSequence = seq.getUnaligned(); int success = pdiffs + 1; //guilty until proven innocent