X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=trimoligos.cpp;h=ddc1053f50d3eba28064907e4c6f0a60a958b3ee;hp=a53f4e85388f0f7bbb6fda40c2074b4f555e29d3;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=7f1aca4ed9e6db70de82e80ca4771f7680b21d26 diff --git a/trimoligos.cpp b/trimoligos.cpp index a53f4e8..ddc1053 100644 --- a/trimoligos.cpp +++ b/trimoligos.cpp @@ -1,9 +1,9 @@ /* - * trimoligos.cpp - * Mothur + * trimoligos.cpp + * Mothur * - * Created by westcott on 9/1/11. - * Copyright 2011 Schloss Lab. All rights reserved. + * Created by westcott on 9/1/11. + * Copyright 2011 Schloss Lab. All rights reserved. * */ @@ -15,17 +15,18 @@ /********************************************************************/ //strip, pdiffs, bdiffs, primers, barcodes, revPrimers TrimOligos::TrimOligos(int p, int b, int l, int s, map pr, map br, vector r, vector lk, vector sp){ - try { - m = MothurOut::getInstance(); - - pdiffs = p; - bdiffs = b; + try { + m = MothurOut::getInstance(); + paired = false; + + pdiffs = p; + bdiffs = b; ldiffs = l; sdiffs = s; - - barcodes = br; - primers = pr; - revPrimer = r; + + barcodes = br; + primers = pr; + revPrimer = r; linker = lk; spacer = sp; maxFBarcodeLength = 0; @@ -35,7 +36,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map pr, map::iterator it; + map::iterator it; for(it=primers.begin();it!=primers.end();it++){ if(it->first.length() > maxFPrimerLength){ maxFPrimerLength = it->first.length(); @@ -55,42 +56,43 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map pr, maperrorOut(e, "TrimOligos", "TrimOligos"); - exit(1); - } + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "TrimOligos"); + exit(1); + } } /********************************************************************/ //strip, pdiffs, bdiffs, primers, barcodes, revPrimers TrimOligos::TrimOligos(int p, int b, int l, int s, map pr, map br){ - try { - m = MothurOut::getInstance(); - - pdiffs = p; - bdiffs = b; + try { + m = MothurOut::getInstance(); + + pdiffs = p; + bdiffs = b; ldiffs = l; sdiffs = s; - + paired = true; + maxFBarcodeLength = 0; for(map::iterator it=br.begin();it!=br.end();it++){ string forward = it->second.forward; map >::iterator itForward = ifbarcodes.find(forward); - if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); } + if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); } if (itForward == ifbarcodes.end()) { vector temp; temp.push_back(it->first); ifbarcodes[forward] = temp; }else { itForward->second.push_back(it->first); } } - + maxFPrimerLength = 0; for(map::iterator it=pr.begin();it!=pr.end();it++){ string forward = it->second.forward; map >::iterator itForward = ifprimers.find(forward); - if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); } + if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); } if (itForward == ifprimers.end()) { vector temp; temp.push_back(it->first); @@ -103,7 +105,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map pr, map< string forward = it->second.reverse; map >::iterator itForward = irbarcodes.find(forward); - if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); } + if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); } if (itForward == irbarcodes.end()) { vector temp; temp.push_back(it->first); @@ -116,34 +118,35 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map pr, map< string forward = it->second.reverse; map >::iterator itForward = irprimers.find(forward); - if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); } + if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); } if (itForward == irprimers.end()) { vector temp; temp.push_back(it->first); irprimers[forward] = temp; }else { itForward->second.push_back(it->first); } } - + ipbarcodes = br; ipprimers = pr; - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "TrimOligos"); - exit(1); - } + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "TrimOligos"); + exit(1); + } } /********************************************************************/ //strip, pdiffs, bdiffs, primers, barcodes, revPrimers TrimOligos::TrimOligos(int p, int b, map pr, map br, vector r){ - try { - m = MothurOut::getInstance(); - - pdiffs = p; - bdiffs = b; - - barcodes = br; - primers = pr; - revPrimer = r; + try { + m = MothurOut::getInstance(); + + pdiffs = p; + bdiffs = b; + + barcodes = br; + primers = pr; + revPrimer = r; + paired = false; maxFBarcodeLength = 0; for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ @@ -162,29 +165,29 @@ TrimOligos::TrimOligos(int p, int b, map pr, map br, v } } maxRPrimerLength = maxFPrimerLength; - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "TrimOligos"); - exit(1); - } + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "TrimOligos"); + exit(1); + } } /********************************************************************/ TrimOligos::~TrimOligos() {} //********************************************************************/ bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){ - try { - - string rawSequence = seq.getUnaligned(); - - for(map::iterator it=primers.begin();it!=primers.end();it++){ - string oligo = it->first; - - if(rawSequence.length() < oligo.length()) { break; } - - //search for primer + try { + + string rawSequence = seq.getUnaligned(); + + for(map::iterator it=primers.begin();it!=primers.end();it++){ + string oligo = it->first; + + if(rawSequence.length() < oligo.length()) { break; } + + //search for primer int olength = oligo.length(); for (int j = 0; j < rawSequence.length()-olength; j++){ - if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; } + if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; } string rawChunk = rawSequence.substr(j, olength); if(compareDNASeq(oligo, rawChunk)) { primerStart = j; @@ -193,22 +196,22 @@ bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){ } } - } - + } + primerStart = 0; primerEnd = 0; //if you don't want to allow for diffs - if (pdiffs == 0) { return false; } - else { //try aligning and see if you can find it - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - + if (pdiffs == 0) { return false; } + else { //try aligning and see if you can find it + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + Alignment* alignment; - if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } - else{ alignment = NULL; } + if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } - for(map::iterator it=primers.begin();it!=primers.end();it++){ + for(map::iterator it=primers.begin();it!=primers.end();it++){ string prim = it->first; //search for primer @@ -218,26 +221,26 @@ bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){ for (int j = 0; j < rawSequence.length()-olength; j++){ string oligo = it->first; - - if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; } - + + if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; } + string rawChunk = rawSequence.substr(j, olength+pdiffs); //use needleman to align first primer.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawChunk); + alignment->alignPrimer(oligo, rawChunk); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); - + int alnLength = oligo.length(); - + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); - + int numDiff = countDiffs(oligo, temp); - + if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -246,38 +249,38 @@ bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){ }else if(numDiff == minDiff){ minCount++; } } } - } - - if (alignment != NULL) { delete alignment; } + } + + if (alignment != NULL) { delete alignment; } - if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches - else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers - else{ return true; } - } - + if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches + else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers + else{ return true; } + } + primerStart = 0; primerEnd = 0; - return false; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripForward"); - exit(1); - } + return false; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripForward"); + exit(1); + } } //******************************************************************/ bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){ - try { - - string rawSequence = seq.getUnaligned(); - - for(int i=0;i= 0; j--){ - if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; } + if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; } string rawChunk = rawSequence.substr(j, olength); if(compareDNASeq(oligo, rawChunk)) { @@ -287,239 +290,239 @@ bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){ } } - } - + } + primerStart = 0; primerEnd = 0; - return false; - } - catch(exception& e) { - m->errorOut(e, "PcrSeqsCommand", "findReverse"); - exit(1); - } + return false; + } + catch(exception& e) { + m->errorOut(e, "PcrSeqsCommand", "findReverse"); + exit(1); + } } //*******************************************************************/ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){ - try { - - string rawSequence = seq.getUnaligned(); - int success = bdiffs + 1; //guilty until proven innocent - - //can you find the barcode - for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ - string oligo = it->first; - if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length - success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } - - if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ - group = it->second; - seq.setUnaligned(rawSequence.substr(oligo.length())); - - if(qual.getName() != ""){ - qual.trimQScores(oligo.length(), -1); - } - - success = 0; - break; - } - } - - //if you found the barcode or if you don't want to allow for diffs - if ((bdiffs == 0) || (success == 0)) { return success; } - - else { //try aligning and see if you can find it - Alignment* alignment; - if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - int minGroup = -1; - int minPos = 0; - - for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ - string oligo = it->first; - // int length = oligo.length(); - - if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length - success = bdiffs + 10; - break; - } - - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ - if(oligo[i] != '-'){ alnLength = i+1; break; } - } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); + try { + + if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; } + + string rawSequence = seq.getUnaligned(); + int success = bdiffs + 1; //guilty until proven innocent + + //can you find the barcode + for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ + string oligo = it->first; + if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ + group = it->second; + seq.setUnaligned(rawSequence.substr(oligo.length())); + + if(qual.getName() != ""){ + qual.trimQScores(oligo.length(), -1); + } + + success = 0; + break; + } + } + + //if you found the barcode or if you don't want to allow for diffs + if ((bdiffs == 0) || (success == 0)) { return success; } + + else { //try aligning and see if you can find it + Alignment* alignment; + if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + int minGroup = -1; + int minPos = 0; + + for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ + string oligo = it->first; + // int length = oligo.length(); + + if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ + if(oligo[i] != '-'){ alnLength = i+1; break; } + } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); - - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minGroup = it->second; - minPos = 0; - for(int i=0;i bdiffs) { success = minDiff; } //no good matches - else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes - else{ //use the best match - group = minGroup; - seq.setUnaligned(rawSequence.substr(minPos)); - - if(qual.getName() != ""){ - qual.trimQScores(minPos, -1); - } - success = minDiff; - } - - if (alignment != NULL) { delete alignment; } - - } - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripBarcode"); - exit(1); - } + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minGroup = it->second; + minPos = 0; + for(int i=0;i bdiffs) { success = minDiff; } //no good matches + else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes + else{ //use the best match + group = minGroup; + seq.setUnaligned(rawSequence.substr(minPos)); + + if(qual.getName() != ""){ + qual.trimQScores(minPos, -1); + } + success = minDiff; + } + + if (alignment != NULL) { delete alignment; } + + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripBarcode"); + exit(1); + } } //*******************************************************************/ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){ - try { - //look for forward barcode - string rawFSequence = forwardSeq.getUnaligned(); + try { + //look for forward barcode + string rawFSequence = forwardSeq.getUnaligned(); string rawRSequence = reverseSeq.getUnaligned(); - int success = bdiffs + 1; //guilty until proven innocent - - //can you find the forward barcode - for(map::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){ - string foligo = it->second.forward; + int success = bdiffs + 1; //guilty until proven innocent + + //can you find the forward barcode + for(map::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){ + string foligo = it->second.forward; string roligo = it->second.reverse; - if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length - success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } + if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length - success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } - - if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { - group = it->first; - forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); - success = 0; - break; - } - } - - //if you found the barcode or if you don't want to allow for diffs - if ((bdiffs == 0) || (success == 0)) { return success; } - else { //try aligning and see if you can find it - Alignment* alignment; - if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - vector< vector > minFGroup; - vector minFPos; + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) { + group = it->first; + forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); + success = 0; + break; + } + } + + //if you found the barcode or if you don't want to allow for diffs + if ((bdiffs == 0) || (success == 0)) { return success; } + else { //try aligning and see if you can find it + Alignment* alignment; + if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + vector< vector > minFGroup; + vector minFPos; //the pair can have duplicates, but we only want to search for the unique forward and reverses, example /* - 1 Sarah Westcott - 2 John Westcott - 3 Anna Westcott - 4 Sarah Schloss - 5 Pat Schloss - 6 Gail Brown - 7 Pat Moore - - only want to look for forward = Sarah, John, Anna, Pat, Gail - reverse = Westcott, Schloss, Brown, Moore - - but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. + 1 Sarah Westcott + 2 John Westcott + 3 Anna Westcott + 4 Sarah Schloss + 5 Pat Schloss + 6 Gail Brown + 7 Pat Moore + only want to look for forward = Sarah, John, Anna, Pat, Gail + reverse = Westcott, Schloss, Brown, Moore + but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. */ - //cout << endl << forwardSeq.getName() << endl; - for(map >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){ - string oligo = it->first; - - if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length - success = bdiffs + 10; - break; - } - //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl; - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); + //cout << endl << forwardSeq.getName() << endl; + for(map >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){ + string oligo = it->first; + + if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; + break; + } + //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl; + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); - + if (alnLength == 0) { numDiff = bdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minFGroup.clear(); + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minFGroup.clear(); minFGroup.push_back(it->second); - int tempminFPos = 0; + int tempminFPos = 0; minFPos.clear(); - for(int i=0;isecond); + }else if(numDiff == minDiff){ + minFGroup.push_back(it->second); int tempminFPos = 0; - for(int i=0;i bdiffs) { success = minDiff; } //no good matches - else{ + //cout << minDiff << '\t' << minCount << '\t' << endl; + if(minDiff > bdiffs) { success = minDiff; } //no good matches + else{ //check for reverse match - if (alignment != NULL) { delete alignment; } + if (alignment != NULL) { delete alignment; } - if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); } - else{ alignment = NULL; } + if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } //can you find the barcode minDiff = 1e6; @@ -536,7 +539,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs)); + alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); @@ -560,7 +563,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr tempminRPos++; } } - minRPos.push_back(tempminRPos); + minRPos.push_back(tempminRPos); }else if(numDiff == minDiff){ int tempminRPos = 0; for(int i=0;isecond); } } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ - if(minDiff > bdiffs) { success = minDiff; } //no good matches - else { + if(minDiff > bdiffs) { success = minDiff; } //no good matches + else { bool foundMatch = false; vector matches; for (int i = 0; i < minFGroup.size(); i++) { @@ -601,9 +591,9 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr int fStart = 0; int rStart = 0; - if (matches.size() == 1) { + if (matches.size() == 1) { foundMatch = true; - group = matches[0]; + group = matches[0]; fStart = minFPos[0]; rStart = minRPos[0]; } @@ -615,138 +605,136 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr success = minDiff; }else { success = bdiffs + 100; } } - } - - if (alignment != NULL) { delete alignment; } - } - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripIBarcode"); - exit(1); - } - + } + + if (alignment != NULL) { delete alignment; } + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripIBarcode"); + exit(1); + } + } //*******************************************************************/ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){ - try { - //look for forward barcode - string rawFSequence = forwardSeq.getUnaligned(); + try { + //look for forward barcode + string rawFSequence = forwardSeq.getUnaligned(); string rawRSequence = reverseSeq.getUnaligned(); - int success = bdiffs + 1; //guilty until proven innocent - - //can you find the forward barcode - for(map::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){ - string foligo = it->second.forward; + int success = bdiffs + 1; //guilty until proven innocent + + //can you find the forward barcode + for(map::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){ + string foligo = it->second.forward; string roligo = it->second.reverse; - if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length - success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } + if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length - success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } - - if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { - group = it->first; - forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) { + group = it->first; + forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); forwardQual.trimQScores(foligo.length(), -1); - reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length()); - success = 0; - break; - } - } - - //if you found the barcode or if you don't want to allow for diffs - if ((bdiffs == 0) || (success == 0)) { return success; } - else { //try aligning and see if you can find it - Alignment* alignment; - if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - vector< vector > minFGroup; - vector minFPos; + reverseQual.trimQScores(roligo.length(), -1); + success = 0; + break; + } + } + + //if you found the barcode or if you don't want to allow for diffs + if ((bdiffs == 0) || (success == 0)) { return success; } + else { //try aligning and see if you can find it + Alignment* alignment; + if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + vector< vector > minFGroup; + vector minFPos; //the pair can have duplicates, but we only want to search for the unique forward and reverses, example /* - 1 Sarah Westcott - 2 John Westcott - 3 Anna Westcott - 4 Sarah Schloss - 5 Pat Schloss - 6 Gail Brown - 7 Pat Moore - + 1 Sarah Westcott + 2 John Westcott + 3 Anna Westcott + 4 Sarah Schloss + 5 Pat Schloss + 6 Gail Brown + 7 Pat Moore only want to look for forward = Sarah, John, Anna, Pat, Gail reverse = Westcott, Schloss, Brown, Moore - - but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. + but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. */ - //cout << endl << forwardSeq.getName() << endl; - for(map >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){ - string oligo = it->first; - - if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length - success = bdiffs + 10; - break; - } - //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl; - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); + //cout << endl << forwardSeq.getName() << endl; + for(map >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){ + string oligo = it->first; + + if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; + break; + } + //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl; + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); - + if (alnLength == 0) { numDiff = bdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minFGroup.clear(); + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minFGroup.clear(); minFGroup.push_back(it->second); - int tempminFPos = 0; + int tempminFPos = 0; minFPos.clear(); - for(int i=0;isecond); + }else if(numDiff == minDiff){ + minFGroup.push_back(it->second); int tempminFPos = 0; - for(int i=0;i bdiffs) { success = minDiff; } //no good matches - else{ + //cout << minDiff << '\t' << minCount << '\t' << endl; + if(minDiff > bdiffs) { success = minDiff; } //no good matches + else{ //check for reverse match - if (alignment != NULL) { delete alignment; } + if (alignment != NULL) { delete alignment; } - if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); } - else{ alignment = NULL; } + if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } //can you find the barcode minDiff = 1e6; @@ -763,7 +751,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs)); + alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); @@ -787,7 +775,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality tempminRPos++; } } - minRPos.push_back(tempminRPos); + minRPos.push_back(tempminRPos); }else if(numDiff == minDiff){ int tempminRPos = 0; for(int i=0;isecond); } } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ - if(minDiff > bdiffs) { success = minDiff; } //no good matches - else { + if(minDiff > bdiffs) { success = minDiff; } //no good matches + else { bool foundMatch = false; vector matches; for (int i = 0; i < minFGroup.size(); i++) { @@ -828,9 +803,9 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality int fStart = 0; int rStart = 0; - if (matches.size() == 1) { + if (matches.size() == 1) { foundMatch = true; - group = matches[0]; + group = matches[0]; fStart = minFPos[0]; rStart = minRPos[0]; } @@ -844,138 +819,140 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality success = minDiff; }else { success = bdiffs + 100; } } - } - - if (alignment != NULL) { delete alignment; } - } - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripIBarcode"); - exit(1); - } - + } + + if (alignment != NULL) { delete alignment; } + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripIBarcode"); + exit(1); + } + } //*******************************************************************/ -int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){ - try { - //look for forward barcode - string rawFSequence = forwardSeq.getUnaligned(); - string rawRSequence = reverseSeq.getUnaligned(); - int success = pdiffs + 1; //guilty until proven innocent - - //can you find the forward barcode - for(map::iterator it=ipprimers.begin();it!=ipprimers.end();it++){ - string foligo = it->second.forward; +int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){ + try { + //look for forward barcode + string rawSeq = seq.getUnaligned(); + int success = bdiffs + 1; //guilty until proven innocent + //cout << seq.getName() << endl; + //can you find the forward barcode + for(map::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){ + string foligo = it->second.forward; string roligo = it->second.reverse; + //cout << it->first << '\t' << foligo << '\t' << roligo << endl; + //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl; + if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } - if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length - success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } - if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length - success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } - - if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { - group = it->first; - forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); - forwardQual.trimQScores(foligo.length(), -1); - reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length()); - success = 0; - break; - } - } - - //if you found the barcode or if you don't want to allow for diffs - if ((pdiffs == 0) || (success == 0)) { return success; } - else { //try aligning and see if you can find it - Alignment* alignment; - if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - vector< vector > minFGroup; - vector minFPos; + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { + group = it->first; + string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode + seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode + if(qual.getName() != ""){ + qual.trimQScores(-1, rawSeq.length()-roligo.length()); + qual.trimQScores(foligo.length(), -1); + } + success = 0; + break; + } + } + //cout << "success=" << success << endl; + //if you found the barcode or if you don't want to allow for diffs + if ((bdiffs == 0) || (success == 0)) { return success; } + else { //try aligning and see if you can find it + Alignment* alignment; + if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + vector< vector > minFGroup; + vector minFPos; //the pair can have duplicates, but we only want to search for the unique forward and reverses, example /* - 1 Sarah Westcott - 2 John Westcott - 3 Anna Westcott - 4 Sarah Schloss - 5 Pat Schloss - 6 Gail Brown - 7 Pat Moore - + 1 Sarah Westcott + 2 John Westcott + 3 Anna Westcott + 4 Sarah Schloss + 5 Pat Schloss + 6 Gail Brown + 7 Pat Moore only want to look for forward = Sarah, John, Anna, Pat, Gail reverse = Westcott, Schloss, Brown, Moore - - but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. + but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. */ - //cout << endl << forwardSeq.getName() << endl; - for(map >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){ - string oligo = it->first; - - if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length - success = pdiffs + 10; - break; - } - //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl; - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); + //cout << endl << forwardSeq.getName() << endl; + for(map >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){ + string oligo = it->first; + + if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; + break; + } + //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl; + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); - - if (alnLength == 0) { numDiff = pdiffs + 100; } + + if (alnLength == 0) { numDiff = bdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minFGroup.clear(); + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minFGroup.clear(); minFGroup.push_back(it->second); - int tempminFPos = 0; + int tempminFPos = 0; minFPos.clear(); - for(int i=0;isecond); + }else if(numDiff == minDiff){ + minFGroup.push_back(it->second); int tempminFPos = 0; - for(int i=0;i pdiffs) { success = minDiff; } //no good matches - else{ + //cout << minDiff << '\t' << minCount << '\t' << endl; + if(minDiff > bdiffs) { success = minDiff; } //no good matches + else{ //check for reverse match - if (alignment != NULL) { delete alignment; } + if (alignment != NULL) { delete alignment; } - if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); } - else{ alignment = NULL; } + if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } //can you find the barcode minDiff = 1e6; @@ -983,16 +960,18 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality vector< vector > minRGroup; vector minRPos; - for(map >::iterator it=irprimers.begin();it!=irprimers.end();it++){ - string oligo = it->first; - //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl; - if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length - success = pdiffs + 10; + string rawRSequence = reverseOligo(seq.getUnaligned()); + //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl; + for(map >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){ + string oligo = reverseOligo(it->first); + //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl; + if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs)); + alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); @@ -1001,9 +980,9 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); - if (alnLength == 0) { numDiff = pdiffs + 100; } + if (alnLength == 0) { numDiff = bdiffs + 100; } - //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl; if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -1016,7 +995,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality tempminRPos++; } } - minRPos.push_back(tempminRPos); + minRPos.push_back(tempminRPos); }else if(numDiff == minDiff){ int tempminRPos = 0; for(int i=0;isecond); } } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ - if(minDiff > pdiffs) { success = minDiff; } //no good matches - else { + if(minDiff > bdiffs) { success = minDiff; } //no good matches + else { bool foundMatch = false; vector matches; for (int i = 0; i < minFGroup.size(); i++) { @@ -1057,152 +1023,163 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality int fStart = 0; int rStart = 0; - if (matches.size() == 1) { + if (matches.size() == 1) { foundMatch = true; - group = matches[0]; + group = matches[0]; fStart = minFPos[0]; - rStart = minRPos[0]; + rStart = rawSeq.length() - minRPos[0]; + if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence } //we have an acceptable match for the forward and reverse, but do they match? if (foundMatch) { - forwardSeq.setUnaligned(rawFSequence.substr(fStart)); - reverseSeq.setUnaligned(rawRSequence.substr(rStart)); - forwardQual.trimQScores(fStart, -1); - reverseQual.trimQScores(rStart, -1); + string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode + seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode + if(qual.getName() != ""){ + qual.trimQScores(-1, rStart); + qual.trimQScores(fStart, -1); + } success = minDiff; - }else { success = pdiffs + 100; } + //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl; + }else { success = bdiffs + 100; } } - } - - if (alignment != NULL) { delete alignment; } - } - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripIForward"); - exit(1); - } - + } + + if (alignment != NULL) { delete alignment; } + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripPairedBarcode"); + exit(1); + } + } //*******************************************************************/ -int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){ - try { - //look for forward barcode - string rawFSequence = forwardSeq.getUnaligned(); - string rawRSequence = reverseSeq.getUnaligned(); - int success = pdiffs + 1; //guilty until proven innocent - - //can you find the forward barcode - for(map::iterator it=ipprimers.begin();it!=ipprimers.end();it++){ - string foligo = it->second.forward; +int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){ + try { + //look for forward + string rawSeq = seq.getUnaligned(); + int success = pdiffs + 1; //guilty until proven innocent + //cout << seq.getName() << endl; + //can you find the forward + for(map::iterator it=ipprimers.begin();it!=ipprimers.end();it++){ + string foligo = it->second.forward; string roligo = it->second.reverse; - if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length - success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } - if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length - success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } - - if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { - group = it->first; - forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); - reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length()))); - success = 0; - break; - } - } - - //if you found the barcode or if you don't want to allow for diffs - if ((pdiffs == 0) || (success == 0)) { return success; } - else { //try aligning and see if you can find it - Alignment* alignment; - if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - vector< vector > minFGroup; - vector minFPos; + //cout << it->first << '\t' << foligo << '\t' << roligo << endl; + //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl; + if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { + group = it->first; + if (!keepForward) { + string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode + seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode + if(qual.getName() != ""){ + qual.trimQScores(-1, rawSeq.length()-roligo.length()); + qual.trimQScores(foligo.length(), -1); + } + } + success = 0; + break; + } + } + //cout << "success=" << success << endl; + //if you found the barcode or if you don't want to allow for diffs + if ((pdiffs == 0) || (success == 0)) { return success; } + else { //try aligning and see if you can find it + Alignment* alignment; + if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + vector< vector > minFGroup; + vector minFPos; //the pair can have duplicates, but we only want to search for the unique forward and reverses, example /* - 1 Sarah Westcott - 2 John Westcott - 3 Anna Westcott - 4 Sarah Schloss - 5 Pat Schloss - 6 Gail Brown - 7 Pat Moore - + 1 Sarah Westcott + 2 John Westcott + 3 Anna Westcott + 4 Sarah Schloss + 5 Pat Schloss + 6 Gail Brown + 7 Pat Moore only want to look for forward = Sarah, John, Anna, Pat, Gail reverse = Westcott, Schloss, Brown, Moore - - but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. + but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. */ - //cout << endl << forwardSeq.getName() << endl; - for(map >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){ - string oligo = it->first; - - if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length - success = pdiffs + 10; - break; - } - //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl; - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); + //cout << endl << forwardSeq.getName() << endl; + for(map >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){ + string oligo = it->first; + + if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; + break; + } + //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl; + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); - + if (alnLength == 0) { numDiff = pdiffs + 100; } //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minFGroup.clear(); + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minFGroup.clear(); minFGroup.push_back(it->second); - int tempminFPos = 0; + int tempminFPos = 0; minFPos.clear(); - for(int i=0;isecond); + }else if(numDiff == minDiff){ + minFGroup.push_back(it->second); int tempminFPos = 0; - for(int i=0;i pdiffs) { success = minDiff; } //no good matches - else{ + //cout << minDiff << '\t' << minCount << '\t' << endl; + if(minDiff > pdiffs) { success = minDiff; } //no good matches + else{ //check for reverse match - if (alignment != NULL) { delete alignment; } + if (alignment != NULL) { delete alignment; } - if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); } - else{ alignment = NULL; } + if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } //can you find the barcode minDiff = 1e6; @@ -1210,16 +1187,18 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr vector< vector > minRGroup; vector minRPos; + string rawRSequence = reverseOligo(seq.getUnaligned()); + for(map >::iterator it=irprimers.begin();it!=irprimers.end();it++){ - string oligo = it->first; - //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl; + string oligo = reverseOligo(it->first); + //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl; if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length success = pdiffs + 10; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs)); + alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); @@ -1230,7 +1209,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr int numDiff = countDiffs(oligo, temp); if (alnLength == 0) { numDiff = pdiffs + 100; } - //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl; if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -1243,7 +1222,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr tempminRPos++; } } - minRPos.push_back(tempminRPos); + minRPos.push_back(tempminRPos); }else if(numDiff == minDiff){ int tempminRPos = 0; for(int i=0;isecond); } } - /*cout << minDiff << '\t' << minCount << '\t' << endl; - for (int i = 0; i < minFGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; } - cout << endl; - } - cout << endl; - for (int i = 0; i < minRGroup.size(); i++) { - cout << i << '\t'; - for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; } - cout << endl; - } - cout << endl;*/ - if(minDiff > pdiffs) { success = minDiff; } //no good matches - else { + if(minDiff > pdiffs) { success = minDiff; } //no good matches + else { bool foundMatch = false; vector matches; for (int i = 0; i < minFGroup.size(); i++) { @@ -1284,860 +1250,1337 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr int fStart = 0; int rStart = 0; - if (matches.size() == 1) { + if (matches.size() == 1) { foundMatch = true; - group = matches[0]; + group = matches[0]; fStart = minFPos[0]; - rStart = minRPos[0]; + rStart = rawSeq.length() - minRPos[0]; + if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence } //we have an acceptable match for the forward and reverse, but do they match? if (foundMatch) { - forwardSeq.setUnaligned(rawFSequence.substr(fStart)); - reverseSeq.setUnaligned(rawRSequence.substr(rStart)); + if (!keepForward) { + string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode + seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode + if(qual.getName() != ""){ + qual.trimQScores(-1, rStart); + qual.trimQScores(fStart, -1); + } + } success = minDiff; + //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl; }else { success = pdiffs + 100; } } - } - - if (alignment != NULL) { delete alignment; } - } - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripIForward"); - exit(1); - } - -} -//*******************************************************************/ -int TrimOligos::stripBarcode(Sequence& seq, int& group){ - try { - - string rawSequence = seq.getUnaligned(); - int success = bdiffs + 1; //guilty until proven innocent - - //can you find the barcode - for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ - string oligo = it->first; - if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length - success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } - - if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ - group = it->second; - seq.setUnaligned(rawSequence.substr(oligo.length())); - - success = 0; - break; - } - } - - //if you found the barcode or if you don't want to allow for diffs - if ((bdiffs == 0) || (success == 0)) { return success; } - - else { //try aligning and see if you can find it - Alignment* alignment; - if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - int minGroup = -1; - int minPos = 0; - - for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ - string oligo = it->first; - // int length = oligo.length(); - - if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length - success = bdiffs + 10; - break; - } - - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ - if(oligo[i] != '-'){ alnLength = i+1; break; } - } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); - - int numDiff = countDiffs(oligo, temp); - - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minGroup = it->second; - minPos = 0; - for(int i=0;i bdiffs) { success = minDiff; } //no good matches - else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes - else{ //use the best match - group = minGroup; - seq.setUnaligned(rawSequence.substr(minPos)); - success = minDiff; - } - - if (alignment != NULL) { delete alignment; } - - } - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripBarcode"); - exit(1); - } - + } + + if (alignment != NULL) { delete alignment; } + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripPairedPrimers"); + exit(1); + } + } -/********************************************************************/ -int TrimOligos::stripForward(Sequence& seq, int& group){ - try { - - string rawSequence = seq.getUnaligned(); - int success = pdiffs + 1; //guilty until proven innocent - - //can you find the primer - for(map::iterator it=primers.begin();it!=primers.end();it++){ - string oligo = it->first; - if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length - success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } - - if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ - group = it->second; - seq.setUnaligned(rawSequence.substr(oligo.length())); - success = 0; - break; - } - } - - //if you found the barcode or if you don't want to allow for diffs - if ((pdiffs == 0) || (success == 0)) { return success; } - - else { //try aligning and see if you can find it - Alignment* alignment; - if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - int minGroup = -1; - int minPos = 0; - - for(map::iterator it=primers.begin();it!=primers.end();it++){ - string oligo = it->first; - // int length = oligo.length(); - - if(rawSequence.length() < maxFPrimerLength){ - success = pdiffs + 100; - break; - } - - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ - if(oligo[i] != '-'){ alnLength = i+1; break; } - } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); - - int numDiff = countDiffs(oligo, temp); - - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minGroup = it->second; - minPos = 0; - for(int i=0;i pdiffs) { success = minDiff; } //no good matches - else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers - else{ //use the best match - group = minGroup; - seq.setUnaligned(rawSequence.substr(minPos)); - success = minDiff; - } - - if (alignment != NULL) { delete alignment; } - - } - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripForward"); - exit(1); - } -} //*******************************************************************/ -int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){ - try { - string rawSequence = seq.getUnaligned(); - int success = pdiffs + 1; //guilty until proven innocent - - //can you find the primer - for(map::iterator it=primers.begin();it!=primers.end();it++){ - string oligo = it->first; - if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length - success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out - break; - } - - if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ - group = it->second; - if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); } - if(qual.getName() != ""){ - if (!keepForward) { qual.trimQScores(oligo.length(), -1); } - } - success = 0; - break; - } - } - - //if you found the barcode or if you don't want to allow for diffs - if ((pdiffs == 0) || (success == 0)) { return success; } - - else { //try aligning and see if you can find it - Alignment* alignment; - if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - int minGroup = -1; - int minPos = 0; - - for(map::iterator it=primers.begin();it!=primers.end();it++){ - string oligo = it->first; - // int length = oligo.length(); - - if(rawSequence.length() < maxFPrimerLength){ - success = pdiffs + 100; - break; - } - - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ - if(oligo[i] != '-'){ alnLength = i+1; break; } - } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); - - int numDiff = countDiffs(oligo, temp); - - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minGroup = it->second; - minPos = 0; - for(int i=0;i pdiffs) { success = minDiff; } //no good matches - else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers - else{ //use the best match - group = minGroup; - if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); } - if(qual.getName() != ""){ - if (!keepForward) { qual.trimQScores(minPos, -1); } - } - success = minDiff; - } - - if (alignment != NULL) { delete alignment; } - - } - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripForward"); - exit(1); - } -} -//******************************************************************/ -bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){ - try { - string rawSequence = seq.getUnaligned(); - bool success = 0; //guilty until proven innocent - - for(int i=0;ierrorOut(e, "TrimOligos", "stripReverse"); - exit(1); - } -} -//******************************************************************/ -bool TrimOligos::stripReverse(Sequence& seq){ - try { - - string rawSequence = seq.getUnaligned(); - bool success = 0; //guilty until proven innocent - - for(int i=0;ierrorOut(e, "TrimOligos", "stripReverse"); - exit(1); - } -} -//******************************************************************/ -bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){ - try { - string rawSequence = seq.getUnaligned(); - bool success = ldiffs + 1; //guilty until proven innocent - - for(int i=0;i 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - int minPos = 0; - - for(int i = 0; i < linker.size(); i++){ - string oligo = linker[i]; - // int length = oligo.length(); - - if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length - success = ldiffs + 10; - break; - } - - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ - if(oligo[i] != '-'){ alnLength = i+1; break; } - } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); - - int numDiff = countDiffs(oligo, temp); - - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minPos = 0; - for(int i=0;i ldiffs) { success = minDiff; } //no good matches - else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes - else{ //use the best match - seq.setUnaligned(rawSequence.substr(minPos)); - - if(qual.getName() != ""){ - qual.trimQScores(minPos, -1); - } - success = minDiff; - } - - if (alignment != NULL) { delete alignment; } - - } - - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripLinker"); - exit(1); - } -} -//******************************************************************/ -bool TrimOligos::stripLinker(Sequence& seq){ - try { - - string rawSequence = seq.getUnaligned(); - bool success = ldiffs +1; //guilty until proven innocent - - for(int i=0;i 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - int minPos = 0; - - for(int i = 0; i < linker.size(); i++){ - string oligo = linker[i]; - // int length = oligo.length(); - - if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length - success = ldiffs + 10; - break; - } - - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ - if(oligo[i] != '-'){ alnLength = i+1; break; } - } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); - - int numDiff = countDiffs(oligo, temp); - - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minPos = 0; - for(int i=0;i ldiffs) { success = minDiff; } //no good matches - else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes - else{ //use the best match - seq.setUnaligned(rawSequence.substr(minPos)); - success = minDiff; - } - - if (alignment != NULL) { delete alignment; } - - } + //can you find the forward barcode + for(map::iterator it=ipprimers.begin();it!=ipprimers.end();it++){ + string foligo = it->second.forward; + string roligo = it->second.reverse; + + if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) { + group = it->first; + forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); + forwardQual.trimQScores(foligo.length(), -1); + reverseQual.trimQScores(roligo.length(), -1); + success = 0; + break; + } + } + + //if you found the barcode or if you don't want to allow for diffs + if ((pdiffs == 0) || (success == 0)) { return success; } + else { //try aligning and see if you can find it + Alignment* alignment; + if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + vector< vector > minFGroup; + vector minFPos; + + //the pair can have duplicates, but we only want to search for the unique forward and reverses, example + /* + 1 Sarah Westcott + 2 John Westcott + 3 Anna Westcott + 4 Sarah Schloss + 5 Pat Schloss + 6 Gail Brown + 7 Pat Moore + only want to look for forward = Sarah, John, Anna, Pat, Gail + reverse = Westcott, Schloss, Brown, Moore + but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. + */ + //cout << endl << forwardSeq.getName() << endl; + for(map >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){ + string oligo = it->first; + + if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; + break; + } + //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl; + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + + if (alnLength == 0) { numDiff = pdiffs + 100; } + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minFGroup.clear(); + minFGroup.push_back(it->second); + int tempminFPos = 0; + minFPos.clear(); + for(int i=0;isecond); + int tempminFPos = 0; + for(int i=0;i pdiffs) { success = minDiff; } //no good matches + else{ + //check for reverse match + if (alignment != NULL) { delete alignment; } + + if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + minDiff = 1e6; + minCount = 1; + vector< vector > minRGroup; + vector minRPos; + + for(map >::iterator it=irprimers.begin();it!=irprimers.end();it++){ + string oligo = it->first; + //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl; + if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + if (alnLength == 0) { numDiff = pdiffs + 100; } + + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minRGroup.clear(); + minRGroup.push_back(it->second); + int tempminRPos = 0; + minRPos.clear(); + for(int i=0;isecond); + } + + } + + if(minDiff > pdiffs) { success = minDiff; } //no good matches + else { + bool foundMatch = false; + vector matches; + for (int i = 0; i < minFGroup.size(); i++) { + for (int j = 0; j < minFGroup[i].size(); j++) { + for (int k = 0; k < minRGroup.size(); k++) { + if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); } + } + } + } + + int fStart = 0; + int rStart = 0; + if (matches.size() == 1) { + foundMatch = true; + group = matches[0]; + fStart = minFPos[0]; + rStart = minRPos[0]; + } + + //we have an acceptable match for the forward and reverse, but do they match? + if (foundMatch) { + forwardSeq.setUnaligned(rawFSequence.substr(fStart)); + reverseSeq.setUnaligned(rawRSequence.substr(rStart)); + forwardQual.trimQScores(fStart, -1); + reverseQual.trimQScores(rStart, -1); + success = minDiff; + }else { success = pdiffs + 100; } + } + } + + if (alignment != NULL) { delete alignment; } + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripIForward"); + exit(1); + } + +} +//*******************************************************************/ +int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){ + try { + //look for forward barcode + string rawFSequence = forwardSeq.getUnaligned(); + string rawRSequence = reverseSeq.getUnaligned(); + int success = pdiffs + 1; //guilty until proven innocent + + //can you find the forward barcode + for(map::iterator it=ipprimers.begin();it!=ipprimers.end();it++){ + string foligo = it->second.forward; + string roligo = it->second.reverse; + + if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) { + group = it->first; + forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); + reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); + success = 0; + break; + } + } + + //if you found the barcode or if you don't want to allow for diffs + if ((pdiffs == 0) || (success == 0)) { return success; } + else { //try aligning and see if you can find it + Alignment* alignment; + if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + vector< vector > minFGroup; + vector minFPos; + + //the pair can have duplicates, but we only want to search for the unique forward and reverses, example + /* + 1 Sarah Westcott + 2 John Westcott + 3 Anna Westcott + 4 Sarah Schloss + 5 Pat Schloss + 6 Gail Brown + 7 Pat Moore + only want to look for forward = Sarah, John, Anna, Pat, Gail + reverse = Westcott, Schloss, Brown, Moore + but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. + */ + //cout << endl << forwardSeq.getName() << endl; + for(map >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){ + string oligo = it->first; + + if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; + break; + } + //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl; + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + + if (alnLength == 0) { numDiff = pdiffs + 100; } + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minFGroup.clear(); + minFGroup.push_back(it->second); + int tempminFPos = 0; + minFPos.clear(); + for(int i=0;isecond); + int tempminFPos = 0; + for(int i=0;i pdiffs) { success = minDiff; } //no good matches + else{ + //check for reverse match + if (alignment != NULL) { delete alignment; } + + if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + minDiff = 1e6; + minCount = 1; + vector< vector > minRGroup; + vector minRPos; + + for(map >::iterator it=irprimers.begin();it!=irprimers.end();it++){ + string oligo = it->first; + //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl; + if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + if (alnLength == 0) { numDiff = pdiffs + 100; } + + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minRGroup.clear(); + minRGroup.push_back(it->second); + int tempminRPos = 0; + minRPos.clear(); + for(int i=0;isecond); + } + + } + + if(minDiff > pdiffs) { success = minDiff; } //no good matches + else { + bool foundMatch = false; + vector matches; + for (int i = 0; i < minFGroup.size(); i++) { + for (int j = 0; j < minFGroup[i].size(); j++) { + for (int k = 0; k < minRGroup.size(); k++) { + if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); } + } + } + } + + int fStart = 0; + int rStart = 0; + if (matches.size() == 1) { + foundMatch = true; + group = matches[0]; + fStart = minFPos[0]; + rStart = minRPos[0]; + } + + //we have an acceptable match for the forward and reverse, but do they match? + if (foundMatch) { + forwardSeq.setUnaligned(rawFSequence.substr(fStart)); + reverseSeq.setUnaligned(rawRSequence.substr(rStart)); + success = minDiff; + }else { success = pdiffs + 100; } + } + } + + if (alignment != NULL) { delete alignment; } + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripIForward"); + exit(1); + } + +} +//*******************************************************************/ +int TrimOligos::stripBarcode(Sequence& seq, int& group){ + try { + + string rawSequence = seq.getUnaligned(); + int success = bdiffs + 1; //guilty until proven innocent + + //can you find the barcode + for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ + string oligo = it->first; + if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ + group = it->second; + seq.setUnaligned(rawSequence.substr(oligo.length())); + + success = 0; + break; + } + } + + //if you found the barcode or if you don't want to allow for diffs + if ((bdiffs == 0) || (success == 0)) { return success; } + + else { //try aligning and see if you can find it + Alignment* alignment; + if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + int minGroup = -1; + int minPos = 0; + + for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ + string oligo = it->first; + // int length = oligo.length(); + + if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ + if(oligo[i] != '-'){ alnLength = i+1; break; } + } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + + int numDiff = countDiffs(oligo, temp); + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minGroup = it->second; + minPos = 0; + for(int i=0;i bdiffs) { success = minDiff; } //no good matches + else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes + else{ //use the best match + group = minGroup; + seq.setUnaligned(rawSequence.substr(minPos)); + success = minDiff; + } + + if (alignment != NULL) { delete alignment; } + + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripBarcode"); + exit(1); + } + +} - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripLinker"); - exit(1); - } +/********************************************************************/ +int TrimOligos::stripForward(Sequence& seq, int& group){ + try { + + string rawSequence = seq.getUnaligned(); + int success = pdiffs + 1; //guilty until proven innocent + + //can you find the primer + for(map::iterator it=primers.begin();it!=primers.end();it++){ + string oligo = it->first; + if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ + group = it->second; + seq.setUnaligned(rawSequence.substr(oligo.length())); + success = 0; + break; + } + } + + //if you found the barcode or if you don't want to allow for diffs + if ((pdiffs == 0) || (success == 0)) { return success; } + + else { //try aligning and see if you can find it + Alignment* alignment; + if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + int minGroup = -1; + int minPos = 0; + + for(map::iterator it=primers.begin();it!=primers.end();it++){ + string oligo = it->first; + // int length = oligo.length(); + + if(rawSequence.length() < maxFPrimerLength){ + success = pdiffs + 100; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ + if(oligo[i] != '-'){ alnLength = i+1; break; } + } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + + int numDiff = countDiffs(oligo, temp); + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minGroup = it->second; + minPos = 0; + for(int i=0;i pdiffs) { success = minDiff; } //no good matches + else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers + else{ //use the best match + group = minGroup; + seq.setUnaligned(rawSequence.substr(minPos)); + success = minDiff; + } + + if (alignment != NULL) { delete alignment; } + + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripForward"); + exit(1); + } +} +//*******************************************************************/ +int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){ + try { + + if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; } + + string rawSequence = seq.getUnaligned(); + int success = pdiffs + 1; //guilty until proven innocent + + //can you find the primer + for(map::iterator it=primers.begin();it!=primers.end();it++){ + string oligo = it->first; + if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ + group = it->second; + if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); } + if(qual.getName() != ""){ + if (!keepForward) { qual.trimQScores(oligo.length(), -1); } + } + success = 0; + break; + } + } + + //if you found the barcode or if you don't want to allow for diffs + if ((pdiffs == 0) || (success == 0)) { return success; } + + else { //try aligning and see if you can find it + Alignment* alignment; + if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + int minGroup = -1; + int minPos = 0; + + for(map::iterator it=primers.begin();it!=primers.end();it++){ + string oligo = it->first; + // int length = oligo.length(); + + if(rawSequence.length() < maxFPrimerLength){ + success = pdiffs + 100; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ + if(oligo[i] != '-'){ alnLength = i+1; break; } + } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + + int numDiff = countDiffs(oligo, temp); + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minGroup = it->second; + minPos = 0; + for(int i=0;i pdiffs) { success = minDiff; } //no good matches + else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers + else{ //use the best match + group = minGroup; + if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); } + if(qual.getName() != ""){ + if (!keepForward) { qual.trimQScores(minPos, -1); } + } + success = minDiff; + } + + if (alignment != NULL) { delete alignment; } + + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripForward"); + exit(1); + } +} +//******************************************************************/ +bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){ + try { + string rawSequence = seq.getUnaligned(); + bool success = 0; //guilty until proven innocent + + for(int i=0;ierrorOut(e, "TrimOligos", "stripReverse"); + exit(1); + } +} +//******************************************************************/ +bool TrimOligos::stripReverse(Sequence& seq){ + try { + + string rawSequence = seq.getUnaligned(); + bool success = 0; //guilty until proven innocent + + for(int i=0;ierrorOut(e, "TrimOligos", "stripReverse"); + exit(1); + } +} +//******************************************************************/ +bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){ + try { + string rawSequence = seq.getUnaligned(); + bool success = ldiffs + 1; //guilty until proven innocent + + for(int i=0;i 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + int minPos = 0; + + for(int i = 0; i < linker.size(); i++){ + string oligo = linker[i]; + // int length = oligo.length(); + + if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length + success = ldiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ + if(oligo[i] != '-'){ alnLength = i+1; break; } + } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + + int numDiff = countDiffs(oligo, temp); + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minPos = 0; + for(int i=0;i ldiffs) { success = minDiff; } //no good matches + else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes + else{ //use the best match + seq.setUnaligned(rawSequence.substr(minPos)); + + if(qual.getName() != ""){ + qual.trimQScores(minPos, -1); + } + success = minDiff; + } + + if (alignment != NULL) { delete alignment; } + + } + + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripLinker"); + exit(1); + } +} +//******************************************************************/ +bool TrimOligos::stripLinker(Sequence& seq){ + try { + + string rawSequence = seq.getUnaligned(); + bool success = ldiffs +1; //guilty until proven innocent + + for(int i=0;i 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + int minPos = 0; + + for(int i = 0; i < linker.size(); i++){ + string oligo = linker[i]; + // int length = oligo.length(); + + if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length + success = ldiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ + if(oligo[i] != '-'){ alnLength = i+1; break; } + } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + + int numDiff = countDiffs(oligo, temp); + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minPos = 0; + for(int i=0;i ldiffs) { success = minDiff; } //no good matches + else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes + else{ //use the best match + seq.setUnaligned(rawSequence.substr(minPos)); + success = minDiff; + } + + if (alignment != NULL) { delete alignment; } + + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripLinker"); + exit(1); + } } //******************************************************************/ bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){ - try { - string rawSequence = seq.getUnaligned(); - bool success = sdiffs+1; //guilty until proven innocent - - for(int i=0;i 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - int minPos = 0; - - for(int i = 0; i < spacer.size(); i++){ - string oligo = spacer[i]; - // int length = oligo.length(); - - if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length - success = sdiffs + 10; - break; - } - - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ - if(oligo[i] != '-'){ alnLength = i+1; break; } - } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); - - int numDiff = countDiffs(oligo, temp); - - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minPos = 0; - for(int i=0;i sdiffs) { success = minDiff; } //no good matches - else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes - else{ //use the best match - seq.setUnaligned(rawSequence.substr(minPos)); - - if(qual.getName() != ""){ - qual.trimQScores(minPos, -1); - } - success = minDiff; - } - - if (alignment != NULL) { delete alignment; } - - } + if ((sdiffs == 0) || (success == 0)) { return success; } - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripSpacer"); - exit(1); - } + else { //try aligning and see if you can find it + Alignment* alignment; + if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + int minPos = 0; + + for(int i = 0; i < spacer.size(); i++){ + string oligo = spacer[i]; + // int length = oligo.length(); + + if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length + success = sdiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ + if(oligo[i] != '-'){ alnLength = i+1; break; } + } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + + int numDiff = countDiffs(oligo, temp); + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minPos = 0; + for(int i=0;i sdiffs) { success = minDiff; } //no good matches + else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes + else{ //use the best match + seq.setUnaligned(rawSequence.substr(minPos)); + + if(qual.getName() != ""){ + qual.trimQScores(minPos, -1); + } + success = minDiff; + } + + if (alignment != NULL) { delete alignment; } + + } + + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripSpacer"); + exit(1); + } } //******************************************************************/ bool TrimOligos::stripSpacer(Sequence& seq){ - try { - - string rawSequence = seq.getUnaligned(); - bool success = sdiffs+1; //guilty until proven innocent - - for(int i=0;i 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); } - else{ alignment = NULL; } - - //can you find the barcode - int minDiff = 1e6; - int minCount = 1; - int minPos = 0; - - for(int i = 0; i < spacer.size(); i++){ - string oligo = spacer[i]; - // int length = oligo.length(); - - if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length - success = sdiffs + 10; - break; - } - - //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs)); - oligo = alignment->getSeqAAln(); - string temp = alignment->getSeqBAln(); - - int alnLength = oligo.length(); - - for(int i=oligo.length()-1;i>=0;i--){ - if(oligo[i] != '-'){ alnLength = i+1; break; } - } - oligo = oligo.substr(0,alnLength); - temp = temp.substr(0,alnLength); - - int numDiff = countDiffs(oligo, temp); - - if(numDiff < minDiff){ - minDiff = numDiff; - minCount = 1; - minPos = 0; - for(int i=0;i sdiffs) { success = minDiff; } //no good matches - else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes - else{ //use the best match - seq.setUnaligned(rawSequence.substr(minPos)); - success = minDiff; - } - - if (alignment != NULL) { delete alignment; } - - } - - return success; - - } - catch(exception& e) { - m->errorOut(e, "TrimOligos", "stripSpacer"); - exit(1); - } + if ((sdiffs == 0) || (success == 0)) { return success; } + + else { //try aligning and see if you can find it + Alignment* alignment; + if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + int minPos = 0; + + for(int i = 0; i < spacer.size(); i++){ + string oligo = spacer[i]; + // int length = oligo.length(); + + if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length + success = sdiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ + if(oligo[i] != '-'){ alnLength = i+1; break; } + } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + + int numDiff = countDiffs(oligo, temp); + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minPos = 0; + for(int i=0;i sdiffs) { success = minDiff; } //no good matches + else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes + else{ //use the best match + seq.setUnaligned(rawSequence.substr(minPos)); + success = minDiff; + } + + if (alignment != NULL) { delete alignment; } + + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripSpacer"); + exit(1); + } } //******************************************************************/ bool TrimOligos::compareDNASeq(string oligo, string seq){ - try { - bool success = 1; - int length = oligo.length(); - - for(int i=0;ierrorOut(e, "TrimOligos", "compareDNASeq"); - exit(1); - } - + try { + bool success = 1; + int length = oligo.length(); + + for(int i=0;ierrorOut(e, "TrimOligos", "compareDNASeq"); + exit(1); + } + } //********************************************************************/ int TrimOligos::countDiffs(string oligo, string seq){ - try { - - int length = oligo.length(); - int countDiffs = 0; - - for(int i=0;ierrorOut(e, "TrimOligos", "countDiffs"); - exit(1); - } + try { + + int length = oligo.length(); + int countDiffs = 0; + + for(int i=0;ierrorOut(e, "TrimOligos", "countDiffs"); + exit(1); + } +} +//********************************************************************/ +string TrimOligos::reverseOligo(string oligo){ + try { + string reverse = ""; + + for(int i=oligo.length()-1;i>=0;i--){ + + if(oligo[i] == 'A') { reverse += 'T'; } + else if(oligo[i] == 'T'){ reverse += 'A'; } + else if(oligo[i] == 'U'){ reverse += 'A'; } + + else if(oligo[i] == 'G'){ reverse += 'C'; } + else if(oligo[i] == 'C'){ reverse += 'G'; } + + else if(oligo[i] == 'R'){ reverse += 'Y'; } + else if(oligo[i] == 'Y'){ reverse += 'R'; } + + else if(oligo[i] == 'M'){ reverse += 'K'; } + else if(oligo[i] == 'K'){ reverse += 'M'; } + + else if(oligo[i] == 'W'){ reverse += 'W'; } + else if(oligo[i] == 'S'){ reverse += 'S'; } + + else if(oligo[i] == 'B'){ reverse += 'V'; } + else if(oligo[i] == 'V'){ reverse += 'B'; } + + else if(oligo[i] == 'D'){ reverse += 'H'; } + else if(oligo[i] == 'H'){ reverse += 'D'; } + + else if(oligo[i] == '-'){ reverse += '-'; } + else { reverse += 'N'; } + } + + + return reverse; + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "reverseOligo"); + exit(1); + } } -/********************************************************************/ +/********************************************************************/