5 * Created by westcott on 9/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
19 m = MothurOut::getInstance();
32 maxFBarcodeLength = 0;
33 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34 if(it->first.length() > maxFBarcodeLength){
35 maxFBarcodeLength = it->first.length();
39 map<string,int>::iterator it;
40 for(it=primers.begin();it!=primers.end();it++){
41 if(it->first.length() > maxFPrimerLength){
42 maxFPrimerLength = it->first.length();
47 for(int i = 0; i < linker.size(); i++){
48 if(linker[i].length() > maxLinkerLength){
49 maxLinkerLength = linker[i].length();
54 for(int i = 0; i < spacer.size(); i++){
55 if(spacer[i].length() > maxSpacerLength){
56 maxSpacerLength = spacer[i].length();
61 m->errorOut(e, "TrimOligos", "TrimOligos");
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
69 m = MothurOut::getInstance();
77 maxFBarcodeLength = 0;
78 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79 string forward = it->second.forward;
80 map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
82 if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
84 if (itForward == ifbarcodes.end()) {
85 vector<int> temp; temp.push_back(it->first);
86 ifbarcodes[forward] = temp;
87 }else { itForward->second.push_back(it->first); }
91 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92 string forward = it->second.forward;
93 map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
95 if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
97 if (itForward == ifprimers.end()) {
98 vector<int> temp; temp.push_back(it->first);
99 ifprimers[forward] = temp;
100 }else { itForward->second.push_back(it->first); }
103 maxRBarcodeLength = 0;
104 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105 string forward = it->second.reverse;
106 map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
108 if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
110 if (itForward == irbarcodes.end()) {
111 vector<int> temp; temp.push_back(it->first);
112 irbarcodes[forward] = temp;
113 }else { itForward->second.push_back(it->first); }
116 maxRPrimerLength = 0;
117 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118 string forward = it->second.reverse;
119 map<string, vector<int> >::iterator itForward = irprimers.find(forward);
121 if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
123 if (itForward == irprimers.end()) {
124 vector<int> temp; temp.push_back(it->first);
125 irprimers[forward] = temp;
126 }else { itForward->second.push_back(it->first); }
132 catch(exception& e) {
133 m->errorOut(e, "TrimOligos", "TrimOligos");
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
141 m = MothurOut::getInstance();
151 maxFBarcodeLength = 0;
152 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153 string oligo = it->first;
154 if(oligo.length() > maxFBarcodeLength){
155 maxFBarcodeLength = oligo.length();
158 maxRBarcodeLength = maxFBarcodeLength;
160 maxFPrimerLength = 0;
161 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162 string oligo = it->first;
163 if(oligo.length() > maxFPrimerLength){
164 maxFPrimerLength = oligo.length();
167 maxRPrimerLength = maxFPrimerLength;
169 catch(exception& e) {
170 m->errorOut(e, "TrimOligos", "TrimOligos");
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
180 string rawSequence = seq.getUnaligned();
182 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183 string oligo = it->first;
185 if(rawSequence.length() < oligo.length()) { break; }
188 int olength = oligo.length();
189 for (int j = 0; j < rawSequence.length()-olength; j++){
190 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191 string rawChunk = rawSequence.substr(j, olength);
192 if(compareDNASeq(oligo, rawChunk)) {
194 primerEnd = primerStart + olength;
201 primerStart = 0; primerEnd = 0;
202 //if you don't want to allow for diffs
203 if (pdiffs == 0) { return false; }
204 else { //try aligning and see if you can find it
206 //can you find the barcode
210 Alignment* alignment;
211 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212 else{ alignment = NULL; }
214 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
216 string prim = it->first;
218 int olength = prim.length();
219 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
221 for (int j = 0; j < rawSequence.length()-olength; j++){
223 string oligo = it->first;
225 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
227 string rawChunk = rawSequence.substr(j, olength+pdiffs);
229 //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230 alignment->alignPrimer(oligo, rawChunk);
231 oligo = alignment->getSeqAAln();
232 string temp = alignment->getSeqBAln();
234 int alnLength = oligo.length();
236 for(int i=oligo.length()-1;i>=0;i--){
237 if(oligo[i] != '-'){ alnLength = i+1; break; }
239 oligo = oligo.substr(0,alnLength);
240 temp = temp.substr(0,alnLength);
242 int numDiff = countDiffs(oligo, temp);
244 if(numDiff < minDiff){
248 primerEnd = primerStart + alnLength;
249 }else if(numDiff == minDiff){ minCount++; }
254 if (alignment != NULL) { delete alignment; }
256 if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches
257 else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers
261 primerStart = 0; primerEnd = 0;
265 catch(exception& e) {
266 m->errorOut(e, "TrimOligos", "stripForward");
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
274 string rawSequence = seq.getUnaligned();
276 for(int i=0;i<revPrimer.size();i++){
277 string oligo = revPrimer[i];
278 if(rawSequence.length() < oligo.length()) { break; }
281 int olength = oligo.length();
282 for (int j = rawSequence.length()-olength; j >= 0; j--){
283 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284 string rawChunk = rawSequence.substr(j, olength);
286 if(compareDNASeq(oligo, rawChunk)) {
288 primerEnd = primerStart + olength;
295 primerStart = 0; primerEnd = 0;
298 catch(exception& e) {
299 m->errorOut(e, "PcrSeqsCommand", "findReverse");
303 //*******************************************************************/
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
308 if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
310 string rawSequence = seq.getUnaligned();
311 int success = bdiffs + 1; //guilty until proven innocent
313 //can you find the barcode
314 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315 string oligo = it->first;
316 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
317 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
321 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
323 seq.setUnaligned(rawSequence.substr(oligo.length()));
325 if(qual.getName() != ""){
326 qual.trimQScores(oligo.length(), -1);
334 //if you found the barcode or if you don't want to allow for diffs
335 if ((bdiffs == 0) || (success == 0)) { return success; }
337 else { //try aligning and see if you can find it
338 Alignment* alignment;
339 if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340 else{ alignment = NULL; }
342 //can you find the barcode
348 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349 string oligo = it->first;
350 // int length = oligo.length();
352 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
353 success = bdiffs + 10;
357 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359 oligo = alignment->getSeqAAln();
360 string temp = alignment->getSeqBAln();
362 int alnLength = oligo.length();
364 for(int i=oligo.length()-1;i>=0;i--){
365 if(oligo[i] != '-'){ alnLength = i+1; break; }
367 oligo = oligo.substr(0,alnLength);
368 temp = temp.substr(0,alnLength);
369 int numDiff = countDiffs(oligo, temp);
371 if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
373 if(numDiff < minDiff){
376 minGroup = it->second;
378 for(int i=0;i<alnLength;i++){
384 else if(numDiff == minDiff){
390 if(minDiff > bdiffs) { success = minDiff; } //no good matches
391 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
392 else{ //use the best match
394 seq.setUnaligned(rawSequence.substr(minPos));
396 if(qual.getName() != ""){
397 qual.trimQScores(minPos, -1);
402 if (alignment != NULL) { delete alignment; }
409 catch(exception& e) {
410 m->errorOut(e, "TrimOligos", "stripBarcode");
414 //*******************************************************************/
415 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
417 //look for forward barcode
418 string rawFSequence = forwardSeq.getUnaligned();
419 string rawRSequence = reverseSeq.getUnaligned();
420 int success = bdiffs + 1; //guilty until proven innocent
422 //can you find the forward barcode
423 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
424 string foligo = it->second.forward;
425 string roligo = it->second.reverse;
427 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
428 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
431 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
432 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
436 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
438 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
439 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
445 //if you found the barcode or if you don't want to allow for diffs
446 if ((bdiffs == 0) || (success == 0)) { return success; }
447 else { //try aligning and see if you can find it
448 Alignment* alignment;
449 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
450 else{ alignment = NULL; }
452 //can you find the barcode
455 vector< vector<int> > minFGroup;
458 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
467 only want to look for forward = Sarah, John, Anna, Pat, Gail
468 reverse = Westcott, Schloss, Brown, Moore
469 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
471 //cout << endl << forwardSeq.getName() << endl;
472 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
473 string oligo = it->first;
475 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
476 success = bdiffs + 10;
479 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
480 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
481 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
482 oligo = alignment->getSeqAAln();
483 string temp = alignment->getSeqBAln();
485 int alnLength = oligo.length();
487 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
488 oligo = oligo.substr(0,alnLength);
489 temp = temp.substr(0,alnLength);
490 int numDiff = countDiffs(oligo, temp);
492 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
494 if (alnLength == 0) { numDiff = bdiffs + 100; }
495 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
497 if(numDiff < minDiff){
501 minFGroup.push_back(it->second);
504 for(int i=0;i<alnLength;i++){
509 minFPos.push_back(tempminFPos);
510 }else if(numDiff == minDiff){
511 minFGroup.push_back(it->second);
513 for(int i=0;i<alnLength;i++){
518 minFPos.push_back(tempminFPos);
522 //cout << minDiff << '\t' << minCount << '\t' << endl;
523 if(minDiff > bdiffs) { success = minDiff; } //no good matches
525 //check for reverse match
526 if (alignment != NULL) { delete alignment; }
528 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
529 else{ alignment = NULL; }
531 //can you find the barcode
534 vector< vector<int> > minRGroup;
537 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
538 string oligo = it->first;
539 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
540 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
541 success = bdiffs + 10;
545 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
546 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
547 oligo = alignment->getSeqAAln();
548 string temp = alignment->getSeqBAln();
550 int alnLength = oligo.length();
551 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
552 oligo = oligo.substr(0,alnLength);
553 temp = temp.substr(0,alnLength);
554 int numDiff = countDiffs(oligo, temp);
556 if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
558 if (alnLength == 0) { numDiff = bdiffs + 100; }
560 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
561 if(numDiff < minDiff){
565 minRGroup.push_back(it->second);
568 for(int i=0;i<alnLength;i++){
573 minRPos.push_back(tempminRPos);
574 }else if(numDiff == minDiff){
576 for(int i=0;i<alnLength;i++){
581 minRPos.push_back(tempminRPos);
582 minRGroup.push_back(it->second);
587 if(minDiff > bdiffs) { success = minDiff; } //no good matches
589 bool foundMatch = false;
591 for (int i = 0; i < minFGroup.size(); i++) {
592 for (int j = 0; j < minFGroup[i].size(); j++) {
593 for (int k = 0; k < minRGroup.size(); k++) {
594 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
601 if (matches.size() == 1) {
608 //we have an acceptable match for the forward and reverse, but do they match?
610 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
611 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
613 }else { success = bdiffs + 100; }
617 if (alignment != NULL) { delete alignment; }
623 catch(exception& e) {
624 m->errorOut(e, "TrimOligos", "stripIBarcode");
629 //*******************************************************************/
630 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
632 //look for forward barcode
633 string rawFSequence = forwardSeq.getUnaligned();
634 string rawRSequence = reverseSeq.getUnaligned();
635 int success = bdiffs + 1; //guilty until proven innocent
637 //can you find the forward barcode
638 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
639 string foligo = it->second.forward;
640 string roligo = it->second.reverse;
642 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
643 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
646 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
647 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
651 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
653 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
654 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
655 forwardQual.trimQScores(foligo.length(), -1);
656 reverseQual.trimQScores(roligo.length(), -1);
662 //if you found the barcode or if you don't want to allow for diffs
663 if ((bdiffs == 0) || (success == 0)) { return success; }
664 else { //try aligning and see if you can find it
665 Alignment* alignment;
666 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
667 else{ alignment = NULL; }
669 //can you find the barcode
672 vector< vector<int> > minFGroup;
675 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
684 only want to look for forward = Sarah, John, Anna, Pat, Gail
685 reverse = Westcott, Schloss, Brown, Moore
686 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
688 //cout << endl << forwardSeq.getName() << endl;
689 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
690 string oligo = it->first;
692 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
693 success = bdiffs + 10;
696 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
697 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
698 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
699 oligo = alignment->getSeqAAln();
700 string temp = alignment->getSeqBAln();
702 int alnLength = oligo.length();
704 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
705 oligo = oligo.substr(0,alnLength);
706 temp = temp.substr(0,alnLength);
707 int numDiff = countDiffs(oligo, temp);
709 if (alnLength == 0) { numDiff = bdiffs + 100; }
710 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
712 if(numDiff < minDiff){
716 minFGroup.push_back(it->second);
719 for(int i=0;i<alnLength;i++){
724 minFPos.push_back(tempminFPos);
725 }else if(numDiff == minDiff){
726 minFGroup.push_back(it->second);
728 for(int i=0;i<alnLength;i++){
733 minFPos.push_back(tempminFPos);
737 //cout << minDiff << '\t' << minCount << '\t' << endl;
738 if(minDiff > bdiffs) { success = minDiff; } //no good matches
740 //check for reverse match
741 if (alignment != NULL) { delete alignment; }
743 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
744 else{ alignment = NULL; }
746 //can you find the barcode
749 vector< vector<int> > minRGroup;
752 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
753 string oligo = it->first;
754 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
755 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
756 success = bdiffs + 10;
760 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
761 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
762 oligo = alignment->getSeqAAln();
763 string temp = alignment->getSeqBAln();
765 int alnLength = oligo.length();
766 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
767 oligo = oligo.substr(0,alnLength);
768 temp = temp.substr(0,alnLength);
769 int numDiff = countDiffs(oligo, temp);
770 if (alnLength == 0) { numDiff = bdiffs + 100; }
772 if (m->debug) { m->mothurOut("[DEBUG]: reverse " + reverseSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
774 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
775 if(numDiff < minDiff){
779 minRGroup.push_back(it->second);
782 for(int i=0;i<alnLength;i++){
787 minRPos.push_back(tempminRPos);
788 }else if(numDiff == minDiff){
790 for(int i=0;i<alnLength;i++){
795 minRPos.push_back(tempminRPos);
796 minRGroup.push_back(it->second);
801 if(minDiff > bdiffs) { success = minDiff; } //no good matches
803 bool foundMatch = false;
805 for (int i = 0; i < minFGroup.size(); i++) {
806 for (int j = 0; j < minFGroup[i].size(); j++) {
807 for (int k = 0; k < minRGroup.size(); k++) {
808 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
815 if (matches.size() == 1) {
822 //we have an acceptable match for the forward and reverse, but do they match?
824 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
825 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
826 forwardQual.trimQScores(fStart, -1);
827 reverseQual.trimQScores(rStart, -1);
829 }else { success = bdiffs + 100; }
833 if (alignment != NULL) { delete alignment; }
839 catch(exception& e) {
840 m->errorOut(e, "TrimOligos", "stripIBarcode");
845 //*******************************************************************/
846 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
848 m->debugOut("Stripping paired barcode");
849 //look for forward barcode
850 string rawSeq = seq.getUnaligned();
851 int success = bdiffs + 1; //guilty until proven innocent
852 //cout << seq.getName() << endl;
853 //can you find the forward barcode
854 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
855 string foligo = it->second.forward;
856 string roligo = it->second.reverse;
857 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
858 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
859 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
860 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
863 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
864 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
868 if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
870 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
872 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
873 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
874 if(qual.getName() != ""){
875 qual.trimQScores(-1, rawSeq.length()-roligo.length());
876 qual.trimQScores(foligo.length(), -1);
882 //cout << "success=" << success << endl;
883 //if you found the barcode or if you don't want to allow for diffs
884 if ((bdiffs == 0) || (success == 0)) { return success; }
885 else { //try aligning and see if you can find it
886 Alignment* alignment;
887 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
888 else{ alignment = NULL; }
890 //can you find the barcode
893 vector< vector<int> > minFGroup;
896 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
905 only want to look for forward = Sarah, John, Anna, Pat, Gail
906 reverse = Westcott, Schloss, Brown, Moore
907 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
909 //cout << endl << forwardSeq.getName() << endl;
910 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
911 string oligo = it->first;
913 if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
914 success = bdiffs + 10;
917 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
918 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
919 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
920 oligo = alignment->getSeqAAln();
921 string temp = alignment->getSeqBAln();
923 int alnLength = oligo.length();
925 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
926 oligo = oligo.substr(0,alnLength);
927 temp = temp.substr(0,alnLength);
928 int numDiff = countDiffs(oligo, temp);
930 if (alnLength == 0) { numDiff = bdiffs + 100; }
931 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
933 if(numDiff < minDiff){
937 minFGroup.push_back(it->second);
940 for(int i=0;i<alnLength;i++){
945 minFPos.push_back(tempminFPos);
946 }else if(numDiff == minDiff){
947 minFGroup.push_back(it->second);
949 for(int i=0;i<alnLength;i++){
954 minFPos.push_back(tempminFPos);
958 //cout << minDiff << '\t' << minCount << '\t' << endl;
959 if(minDiff > bdiffs) { success = minDiff; } //no good matches
961 //check for reverse match
962 if (alignment != NULL) { delete alignment; }
964 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
965 else{ alignment = NULL; }
967 //can you find the barcode
970 vector< vector<int> > minRGroup;
973 string rawRSequence = reverseOligo(seq.getUnaligned());
974 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
975 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
976 string oligo = reverseOligo(it->first);
977 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
978 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
979 success = bdiffs + 10;
983 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
984 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
985 oligo = alignment->getSeqAAln();
986 string temp = alignment->getSeqBAln();
988 int alnLength = oligo.length();
989 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
990 oligo = oligo.substr(0,alnLength);
991 temp = temp.substr(0,alnLength);
992 int numDiff = countDiffs(oligo, temp);
993 if (alnLength == 0) { numDiff = bdiffs + 100; }
995 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
996 if(numDiff < minDiff){
1000 minRGroup.push_back(it->second);
1001 int tempminRPos = 0;
1003 for(int i=0;i<alnLength;i++){
1008 minRPos.push_back(tempminRPos);
1009 }else if(numDiff == minDiff){
1010 int tempminRPos = 0;
1011 for(int i=0;i<alnLength;i++){
1016 minRPos.push_back(tempminRPos);
1017 minRGroup.push_back(it->second);
1022 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1024 bool foundMatch = false;
1025 vector<int> matches;
1026 for (int i = 0; i < minFGroup.size(); i++) {
1027 for (int j = 0; j < minFGroup[i].size(); j++) {
1028 for (int k = 0; k < minRGroup.size(); k++) {
1029 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1036 if (matches.size() == 1) {
1039 fStart = minFPos[0];
1040 rStart = rawSeq.length() - minRPos[0];
1041 if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
1044 //we have an acceptable match for the forward and reverse, but do they match?
1046 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1047 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1048 if(qual.getName() != ""){
1049 qual.trimQScores(-1, rStart);
1050 qual.trimQScores(fStart, -1);
1053 //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1054 }else { success = bdiffs + 100; }
1058 if (alignment != NULL) { delete alignment; }
1064 catch(exception& e) {
1065 m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1070 //*******************************************************************/
1071 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1074 string rawSeq = seq.getUnaligned();
1075 int success = pdiffs + 1; //guilty until proven innocent
1076 //cout << seq.getName() << endl;
1077 //can you find the forward
1078 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1079 string foligo = it->second.forward;
1080 string roligo = it->second.reverse;
1082 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1083 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1084 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1085 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1088 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1089 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1093 if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
1095 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1098 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1099 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1100 if(qual.getName() != ""){
1101 qual.trimQScores(-1, rawSeq.length()-roligo.length());
1102 qual.trimQScores(foligo.length(), -1);
1109 //cout << "success=" << success << endl;
1110 //if you found the barcode or if you don't want to allow for diffs
1111 if ((pdiffs == 0) || (success == 0)) { return success; }
1112 else { //try aligning and see if you can find it
1113 Alignment* alignment;
1114 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1115 else{ alignment = NULL; }
1117 //can you find the barcode
1120 vector< vector<int> > minFGroup;
1121 vector<int> minFPos;
1123 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1132 only want to look for forward = Sarah, John, Anna, Pat, Gail
1133 reverse = Westcott, Schloss, Brown, Moore
1134 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1136 //cout << endl << forwardSeq.getName() << endl;
1137 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1138 string oligo = it->first;
1140 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1141 success = pdiffs + 10;
1144 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
1145 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1146 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1147 oligo = alignment->getSeqAAln();
1148 string temp = alignment->getSeqBAln();
1150 int alnLength = oligo.length();
1152 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1153 oligo = oligo.substr(0,alnLength);
1154 temp = temp.substr(0,alnLength);
1155 int numDiff = countDiffs(oligo, temp);
1157 if (alnLength == 0) { numDiff = pdiffs + 100; }
1158 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1160 if(numDiff < minDiff){
1164 minFGroup.push_back(it->second);
1165 int tempminFPos = 0;
1167 for(int i=0;i<alnLength;i++){
1172 minFPos.push_back(tempminFPos);
1173 }else if(numDiff == minDiff){
1174 minFGroup.push_back(it->second);
1175 int tempminFPos = 0;
1176 for(int i=0;i<alnLength;i++){
1181 minFPos.push_back(tempminFPos);
1185 //cout << minDiff << '\t' << minCount << '\t' << endl;
1186 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1188 //check for reverse match
1189 if (alignment != NULL) { delete alignment; }
1191 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1192 else{ alignment = NULL; }
1194 //can you find the barcode
1197 vector< vector<int> > minRGroup;
1198 vector<int> minRPos;
1200 string rawRSequence = reverseOligo(seq.getUnaligned());
1202 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1203 string oligo = reverseOligo(it->first);
1204 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1205 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1206 success = pdiffs + 10;
1210 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1211 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1212 oligo = alignment->getSeqAAln();
1213 string temp = alignment->getSeqBAln();
1215 int alnLength = oligo.length();
1216 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1217 oligo = oligo.substr(0,alnLength);
1218 temp = temp.substr(0,alnLength);
1219 int numDiff = countDiffs(oligo, temp);
1220 if (alnLength == 0) { numDiff = pdiffs + 100; }
1222 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1223 if(numDiff < minDiff){
1227 minRGroup.push_back(it->second);
1228 int tempminRPos = 0;
1230 for(int i=0;i<alnLength;i++){
1235 minRPos.push_back(tempminRPos);
1236 }else if(numDiff == minDiff){
1237 int tempminRPos = 0;
1238 for(int i=0;i<alnLength;i++){
1243 minRPos.push_back(tempminRPos);
1244 minRGroup.push_back(it->second);
1249 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1251 bool foundMatch = false;
1252 vector<int> matches;
1253 for (int i = 0; i < minFGroup.size(); i++) {
1254 for (int j = 0; j < minFGroup[i].size(); j++) {
1255 for (int k = 0; k < minRGroup.size(); k++) {
1256 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1263 if (matches.size() == 1) {
1266 fStart = minFPos[0];
1267 rStart = rawSeq.length() - minRPos[0];
1268 if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
1271 //we have an acceptable match for the forward and reverse, but do they match?
1274 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1275 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1276 if(qual.getName() != ""){
1277 qual.trimQScores(-1, rStart);
1278 qual.trimQScores(fStart, -1);
1282 //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1283 }else { success = pdiffs + 100; }
1287 if (alignment != NULL) { delete alignment; }
1293 catch(exception& e) {
1294 m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1300 //*******************************************************************/
1301 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1303 //look for forward barcode
1304 string rawFSequence = forwardSeq.getUnaligned();
1305 string rawRSequence = reverseSeq.getUnaligned();
1306 int success = pdiffs + 1; //guilty until proven innocent
1308 //can you find the forward barcode
1309 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1310 string foligo = it->second.forward;
1311 string roligo = it->second.reverse;
1313 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1314 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1317 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1318 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1322 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
1324 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1325 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1326 forwardQual.trimQScores(foligo.length(), -1);
1327 reverseQual.trimQScores(roligo.length(), -1);
1333 //if you found the barcode or if you don't want to allow for diffs
1334 if ((pdiffs == 0) || (success == 0)) { return success; }
1335 else { //try aligning and see if you can find it
1336 Alignment* alignment;
1337 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1338 else{ alignment = NULL; }
1340 //can you find the barcode
1343 vector< vector<int> > minFGroup;
1344 vector<int> minFPos;
1346 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1355 only want to look for forward = Sarah, John, Anna, Pat, Gail
1356 reverse = Westcott, Schloss, Brown, Moore
1357 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1359 //cout << endl << forwardSeq.getName() << endl;
1360 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1361 string oligo = it->first;
1363 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1364 success = pdiffs + 10;
1367 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1368 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1369 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1370 oligo = alignment->getSeqAAln();
1371 string temp = alignment->getSeqBAln();
1373 int alnLength = oligo.length();
1375 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1376 oligo = oligo.substr(0,alnLength);
1377 temp = temp.substr(0,alnLength);
1378 int numDiff = countDiffs(oligo, temp);
1380 if (alnLength == 0) { numDiff = pdiffs + 100; }
1381 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1383 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
1385 if(numDiff < minDiff){
1389 minFGroup.push_back(it->second);
1390 int tempminFPos = 0;
1392 for(int i=0;i<alnLength;i++){
1397 minFPos.push_back(tempminFPos);
1398 }else if(numDiff == minDiff){
1399 minFGroup.push_back(it->second);
1400 int tempminFPos = 0;
1401 for(int i=0;i<alnLength;i++){
1406 minFPos.push_back(tempminFPos);
1410 //cout << minDiff << '\t' << minCount << '\t' << endl;
1411 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1413 //check for reverse match
1414 if (alignment != NULL) { delete alignment; }
1416 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1417 else{ alignment = NULL; }
1419 //can you find the barcode
1422 vector< vector<int> > minRGroup;
1423 vector<int> minRPos;
1425 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1426 string oligo = it->first;
1427 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1428 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1429 success = pdiffs + 10;
1433 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1434 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1435 oligo = alignment->getSeqAAln();
1436 string temp = alignment->getSeqBAln();
1438 int alnLength = oligo.length();
1439 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1440 oligo = oligo.substr(0,alnLength);
1441 temp = temp.substr(0,alnLength);
1442 int numDiff = countDiffs(oligo, temp);
1443 if (alnLength == 0) { numDiff = pdiffs + 100; }
1445 if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
1447 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1448 if(numDiff < minDiff){
1452 minRGroup.push_back(it->second);
1453 int tempminRPos = 0;
1455 for(int i=0;i<alnLength;i++){
1460 minRPos.push_back(tempminRPos);
1461 }else if(numDiff == minDiff){
1462 int tempminRPos = 0;
1463 for(int i=0;i<alnLength;i++){
1468 minRPos.push_back(tempminRPos);
1469 minRGroup.push_back(it->second);
1474 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1476 bool foundMatch = false;
1477 vector<int> matches;
1478 for (int i = 0; i < minFGroup.size(); i++) {
1479 for (int j = 0; j < minFGroup[i].size(); j++) {
1480 for (int k = 0; k < minRGroup.size(); k++) {
1481 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1488 if (matches.size() == 1) {
1491 fStart = minFPos[0];
1492 rStart = minRPos[0];
1495 //we have an acceptable match for the forward and reverse, but do they match?
1497 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1498 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1499 forwardQual.trimQScores(fStart, -1);
1500 reverseQual.trimQScores(rStart, -1);
1502 }else { success = pdiffs + 100; }
1506 if (alignment != NULL) { delete alignment; }
1512 catch(exception& e) {
1513 m->errorOut(e, "TrimOligos", "stripIForward");
1518 //*******************************************************************/
1519 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1521 //look for forward barcode
1522 string rawFSequence = forwardSeq.getUnaligned();
1523 string rawRSequence = reverseSeq.getUnaligned();
1524 int success = pdiffs + 1; //guilty until proven innocent
1526 //can you find the forward barcode
1527 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1528 string foligo = it->second.forward;
1529 string roligo = it->second.reverse;
1531 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1532 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1535 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1536 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1540 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1542 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1543 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1549 //if you found the barcode or if you don't want to allow for diffs
1550 if ((pdiffs == 0) || (success == 0)) { return success; }
1551 else { //try aligning and see if you can find it
1552 Alignment* alignment;
1553 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1554 else{ alignment = NULL; }
1556 //can you find the barcode
1559 vector< vector<int> > minFGroup;
1560 vector<int> minFPos;
1562 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1571 only want to look for forward = Sarah, John, Anna, Pat, Gail
1572 reverse = Westcott, Schloss, Brown, Moore
1573 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1575 //cout << endl << forwardSeq.getName() << endl;
1576 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1577 string oligo = it->first;
1579 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1580 success = pdiffs + 10;
1583 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1584 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1585 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1586 oligo = alignment->getSeqAAln();
1587 string temp = alignment->getSeqBAln();
1589 int alnLength = oligo.length();
1591 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1592 oligo = oligo.substr(0,alnLength);
1593 temp = temp.substr(0,alnLength);
1594 int numDiff = countDiffs(oligo, temp);
1596 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
1598 if (alnLength == 0) { numDiff = pdiffs + 100; }
1599 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1601 if(numDiff < minDiff){
1605 minFGroup.push_back(it->second);
1606 int tempminFPos = 0;
1608 for(int i=0;i<alnLength;i++){
1613 minFPos.push_back(tempminFPos);
1614 }else if(numDiff == minDiff){
1615 minFGroup.push_back(it->second);
1616 int tempminFPos = 0;
1617 for(int i=0;i<alnLength;i++){
1622 minFPos.push_back(tempminFPos);
1626 //cout << minDiff << '\t' << minCount << '\t' << endl;
1627 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1629 //check for reverse match
1630 if (alignment != NULL) { delete alignment; }
1632 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1633 else{ alignment = NULL; }
1635 //can you find the barcode
1638 vector< vector<int> > minRGroup;
1639 vector<int> minRPos;
1641 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1642 string oligo = it->first;
1643 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1644 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1645 success = pdiffs + 10;
1649 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1650 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1651 oligo = alignment->getSeqAAln();
1652 string temp = alignment->getSeqBAln();
1654 int alnLength = oligo.length();
1655 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1656 oligo = oligo.substr(0,alnLength);
1657 temp = temp.substr(0,alnLength);
1658 int numDiff = countDiffs(oligo, temp);
1660 if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
1662 if (alnLength == 0) { numDiff = pdiffs + 100; }
1664 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1665 if(numDiff < minDiff){
1669 minRGroup.push_back(it->second);
1670 int tempminRPos = 0;
1672 for(int i=0;i<alnLength;i++){
1677 minRPos.push_back(tempminRPos);
1678 }else if(numDiff == minDiff){
1679 int tempminRPos = 0;
1680 for(int i=0;i<alnLength;i++){
1685 minRPos.push_back(tempminRPos);
1686 minRGroup.push_back(it->second);
1691 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1693 bool foundMatch = false;
1694 vector<int> matches;
1695 for (int i = 0; i < minFGroup.size(); i++) {
1696 for (int j = 0; j < minFGroup[i].size(); j++) {
1697 for (int k = 0; k < minRGroup.size(); k++) {
1698 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1705 if (matches.size() == 1) {
1708 fStart = minFPos[0];
1709 rStart = minRPos[0];
1712 //we have an acceptable match for the forward and reverse, but do they match?
1714 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1715 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1717 }else { success = pdiffs + 100; }
1721 if (alignment != NULL) { delete alignment; }
1727 catch(exception& e) {
1728 m->errorOut(e, "TrimOligos", "stripIForward");
1733 //*******************************************************************/
1734 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1737 string rawSequence = seq.getUnaligned();
1738 int success = bdiffs + 1; //guilty until proven innocent
1740 //can you find the barcode
1741 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1742 string oligo = it->first;
1743 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1744 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1748 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1750 seq.setUnaligned(rawSequence.substr(oligo.length()));
1757 //if you found the barcode or if you don't want to allow for diffs
1758 if ((bdiffs == 0) || (success == 0)) { return success; }
1760 else { //try aligning and see if you can find it
1761 Alignment* alignment;
1762 if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1763 else{ alignment = NULL; }
1765 //can you find the barcode
1771 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1772 string oligo = it->first;
1773 // int length = oligo.length();
1775 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
1776 success = bdiffs + 10;
1780 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1781 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1782 oligo = alignment->getSeqAAln();
1783 string temp = alignment->getSeqBAln();
1785 int alnLength = oligo.length();
1787 for(int i=oligo.length()-1;i>=0;i--){
1788 if(oligo[i] != '-'){ alnLength = i+1; break; }
1790 oligo = oligo.substr(0,alnLength);
1791 temp = temp.substr(0,alnLength);
1793 int numDiff = countDiffs(oligo, temp);
1795 if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
1797 if(numDiff < minDiff){
1800 minGroup = it->second;
1802 for(int i=0;i<alnLength;i++){
1808 else if(numDiff == minDiff){
1814 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1815 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1816 else{ //use the best match
1818 seq.setUnaligned(rawSequence.substr(minPos));
1822 if (alignment != NULL) { delete alignment; }
1829 catch(exception& e) {
1830 m->errorOut(e, "TrimOligos", "stripBarcode");
1836 /********************************************************************/
1837 int TrimOligos::stripForward(Sequence& seq, int& group){
1840 string rawSequence = seq.getUnaligned();
1841 int success = pdiffs + 1; //guilty until proven innocent
1843 //can you find the primer
1844 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1845 string oligo = it->first;
1846 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1847 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1851 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1853 seq.setUnaligned(rawSequence.substr(oligo.length()));
1859 //if you found the barcode or if you don't want to allow for diffs
1860 if ((pdiffs == 0) || (success == 0)) { return success; }
1862 else { //try aligning and see if you can find it
1863 Alignment* alignment;
1864 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1865 else{ alignment = NULL; }
1867 //can you find the barcode
1873 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1874 string oligo = it->first;
1875 // int length = oligo.length();
1877 if(rawSequence.length() < maxFPrimerLength){
1878 success = pdiffs + 100;
1882 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1883 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1884 oligo = alignment->getSeqAAln();
1885 string temp = alignment->getSeqBAln();
1887 int alnLength = oligo.length();
1889 for(int i=oligo.length()-1;i>=0;i--){
1890 if(oligo[i] != '-'){ alnLength = i+1; break; }
1892 oligo = oligo.substr(0,alnLength);
1893 temp = temp.substr(0,alnLength);
1895 int numDiff = countDiffs(oligo, temp);
1897 if(numDiff < minDiff){
1900 minGroup = it->second;
1902 for(int i=0;i<alnLength;i++){
1908 else if(numDiff == minDiff){
1914 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1915 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1916 else{ //use the best match
1918 seq.setUnaligned(rawSequence.substr(minPos));
1922 if (alignment != NULL) { delete alignment; }
1929 catch(exception& e) {
1930 m->errorOut(e, "TrimOligos", "stripForward");
1934 //*******************************************************************/
1935 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1938 if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
1940 string rawSequence = seq.getUnaligned();
1941 int success = pdiffs + 1; //guilty until proven innocent
1943 //can you find the primer
1944 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1945 string oligo = it->first;
1946 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1947 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1951 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1953 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1954 if(qual.getName() != ""){
1955 if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
1962 //if you found the barcode or if you don't want to allow for diffs
1963 if ((pdiffs == 0) || (success == 0)) { return success; }
1965 else { //try aligning and see if you can find it
1966 Alignment* alignment;
1967 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1968 else{ alignment = NULL; }
1970 //can you find the barcode
1976 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1977 string oligo = it->first;
1978 // int length = oligo.length();
1980 if(rawSequence.length() < maxFPrimerLength){
1981 success = pdiffs + 100;
1985 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1986 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1987 oligo = alignment->getSeqAAln();
1988 string temp = alignment->getSeqBAln();
1990 int alnLength = oligo.length();
1992 for(int i=oligo.length()-1;i>=0;i--){
1993 if(oligo[i] != '-'){ alnLength = i+1; break; }
1995 oligo = oligo.substr(0,alnLength);
1996 temp = temp.substr(0,alnLength);
1998 int numDiff = countDiffs(oligo, temp);
2000 if(numDiff < minDiff){
2003 minGroup = it->second;
2005 for(int i=0;i<alnLength;i++){
2011 else if(numDiff == minDiff){
2017 if(minDiff > pdiffs) { success = minDiff; } //no good matches
2018 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
2019 else{ //use the best match
2021 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2022 if(qual.getName() != ""){
2023 if (!keepForward) { qual.trimQScores(minPos, -1); }
2028 if (alignment != NULL) { delete alignment; }
2035 catch(exception& e) {
2036 m->errorOut(e, "TrimOligos", "stripForward");
2040 //******************************************************************/
2041 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2043 string rawSequence = seq.getUnaligned();
2044 bool success = 0; //guilty until proven innocent
2046 for(int i=0;i<revPrimer.size();i++){
2047 string oligo = revPrimer[i];
2049 if(rawSequence.length() < oligo.length()){
2054 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2055 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2056 if(qual.getName() != ""){
2057 qual.trimQScores(-1, rawSequence.length()-oligo.length());
2066 catch(exception& e) {
2067 m->errorOut(e, "TrimOligos", "stripReverse");
2071 //******************************************************************/
2072 bool TrimOligos::stripReverse(Sequence& seq){
2075 string rawSequence = seq.getUnaligned();
2076 bool success = 0; //guilty until proven innocent
2078 for(int i=0;i<revPrimer.size();i++){
2079 string oligo = revPrimer[i];
2081 if(rawSequence.length() < oligo.length()){
2086 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2087 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2096 catch(exception& e) {
2097 m->errorOut(e, "TrimOligos", "stripReverse");
2101 //******************************************************************/
2102 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2104 string rawSequence = seq.getUnaligned();
2105 bool success = ldiffs + 1; //guilty until proven innocent
2107 for(int i=0;i<linker.size();i++){
2108 string oligo = linker[i];
2110 if(rawSequence.length() < oligo.length()){
2111 success = ldiffs + 10;
2115 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2116 seq.setUnaligned(rawSequence.substr(oligo.length()));
2117 if(qual.getName() != ""){
2118 qual.trimQScores(oligo.length(), -1);
2125 //if you found the linker or if you don't want to allow for diffs
2126 if ((ldiffs == 0) || (success == 0)) { return success; }
2128 else { //try aligning and see if you can find it
2129 Alignment* alignment;
2130 if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2131 else{ alignment = NULL; }
2133 //can you find the barcode
2138 for(int i = 0; i < linker.size(); i++){
2139 string oligo = linker[i];
2140 // int length = oligo.length();
2142 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2143 success = ldiffs + 10;
2147 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2148 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2149 oligo = alignment->getSeqAAln();
2150 string temp = alignment->getSeqBAln();
2152 int alnLength = oligo.length();
2154 for(int i=oligo.length()-1;i>=0;i--){
2155 if(oligo[i] != '-'){ alnLength = i+1; break; }
2157 oligo = oligo.substr(0,alnLength);
2158 temp = temp.substr(0,alnLength);
2160 int numDiff = countDiffs(oligo, temp);
2162 if(numDiff < minDiff){
2166 for(int i=0;i<alnLength;i++){
2172 else if(numDiff == minDiff){
2178 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2179 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2180 else{ //use the best match
2181 seq.setUnaligned(rawSequence.substr(minPos));
2183 if(qual.getName() != ""){
2184 qual.trimQScores(minPos, -1);
2189 if (alignment != NULL) { delete alignment; }
2197 catch(exception& e) {
2198 m->errorOut(e, "TrimOligos", "stripLinker");
2202 //******************************************************************/
2203 bool TrimOligos::stripLinker(Sequence& seq){
2206 string rawSequence = seq.getUnaligned();
2207 bool success = ldiffs +1; //guilty until proven innocent
2209 for(int i=0;i<linker.size();i++){
2210 string oligo = linker[i];
2212 if(rawSequence.length() < oligo.length()){
2213 success = ldiffs +10;
2217 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2218 seq.setUnaligned(rawSequence.substr(oligo.length()));
2224 //if you found the linker or if you don't want to allow for diffs
2225 if ((ldiffs == 0) || (success == 0)) { return success; }
2227 else { //try aligning and see if you can find it
2228 Alignment* alignment;
2229 if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2230 else{ alignment = NULL; }
2232 //can you find the barcode
2237 for(int i = 0; i < linker.size(); i++){
2238 string oligo = linker[i];
2239 // int length = oligo.length();
2241 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2242 success = ldiffs + 10;
2246 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2247 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2248 oligo = alignment->getSeqAAln();
2249 string temp = alignment->getSeqBAln();
2251 int alnLength = oligo.length();
2253 for(int i=oligo.length()-1;i>=0;i--){
2254 if(oligo[i] != '-'){ alnLength = i+1; break; }
2256 oligo = oligo.substr(0,alnLength);
2257 temp = temp.substr(0,alnLength);
2259 int numDiff = countDiffs(oligo, temp);
2261 if(numDiff < minDiff){
2265 for(int i=0;i<alnLength;i++){
2271 else if(numDiff == minDiff){
2277 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2278 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2279 else{ //use the best match
2280 seq.setUnaligned(rawSequence.substr(minPos));
2284 if (alignment != NULL) { delete alignment; }
2291 catch(exception& e) {
2292 m->errorOut(e, "TrimOligos", "stripLinker");
2297 //******************************************************************/
2298 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2300 string rawSequence = seq.getUnaligned();
2301 bool success = sdiffs+1; //guilty until proven innocent
2303 for(int i=0;i<spacer.size();i++){
2304 string oligo = spacer[i];
2306 if(rawSequence.length() < oligo.length()){
2307 success = sdiffs+10;
2311 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2312 seq.setUnaligned(rawSequence.substr(oligo.length()));
2313 if(qual.getName() != ""){
2314 qual.trimQScores(oligo.length(), -1);
2321 //if you found the spacer or if you don't want to allow for diffs
2322 if ((sdiffs == 0) || (success == 0)) { return success; }
2324 else { //try aligning and see if you can find it
2325 Alignment* alignment;
2326 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2327 else{ alignment = NULL; }
2329 //can you find the barcode
2334 for(int i = 0; i < spacer.size(); i++){
2335 string oligo = spacer[i];
2336 // int length = oligo.length();
2338 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2339 success = sdiffs + 10;
2343 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2344 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2345 oligo = alignment->getSeqAAln();
2346 string temp = alignment->getSeqBAln();
2348 int alnLength = oligo.length();
2350 for(int i=oligo.length()-1;i>=0;i--){
2351 if(oligo[i] != '-'){ alnLength = i+1; break; }
2353 oligo = oligo.substr(0,alnLength);
2354 temp = temp.substr(0,alnLength);
2356 int numDiff = countDiffs(oligo, temp);
2358 if(numDiff < minDiff){
2362 for(int i=0;i<alnLength;i++){
2368 else if(numDiff == minDiff){
2374 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2375 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2376 else{ //use the best match
2377 seq.setUnaligned(rawSequence.substr(minPos));
2379 if(qual.getName() != ""){
2380 qual.trimQScores(minPos, -1);
2385 if (alignment != NULL) { delete alignment; }
2393 catch(exception& e) {
2394 m->errorOut(e, "TrimOligos", "stripSpacer");
2398 //******************************************************************/
2399 bool TrimOligos::stripSpacer(Sequence& seq){
2402 string rawSequence = seq.getUnaligned();
2403 bool success = sdiffs+1; //guilty until proven innocent
2405 for(int i=0;i<spacer.size();i++){
2406 string oligo = spacer[i];
2408 if(rawSequence.length() < oligo.length()){
2409 success = sdiffs+10;
2413 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2414 seq.setUnaligned(rawSequence.substr(oligo.length()));
2420 //if you found the spacer or if you don't want to allow for diffs
2421 if ((sdiffs == 0) || (success == 0)) { return success; }
2423 else { //try aligning and see if you can find it
2424 Alignment* alignment;
2425 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2426 else{ alignment = NULL; }
2428 //can you find the barcode
2433 for(int i = 0; i < spacer.size(); i++){
2434 string oligo = spacer[i];
2435 // int length = oligo.length();
2437 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2438 success = sdiffs + 10;
2442 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2443 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2444 oligo = alignment->getSeqAAln();
2445 string temp = alignment->getSeqBAln();
2447 int alnLength = oligo.length();
2449 for(int i=oligo.length()-1;i>=0;i--){
2450 if(oligo[i] != '-'){ alnLength = i+1; break; }
2452 oligo = oligo.substr(0,alnLength);
2453 temp = temp.substr(0,alnLength);
2455 int numDiff = countDiffs(oligo, temp);
2457 if(numDiff < minDiff){
2461 for(int i=0;i<alnLength;i++){
2467 else if(numDiff == minDiff){
2473 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2474 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2475 else{ //use the best match
2476 seq.setUnaligned(rawSequence.substr(minPos));
2480 if (alignment != NULL) { delete alignment; }
2487 catch(exception& e) {
2488 m->errorOut(e, "TrimOligos", "stripSpacer");
2493 //******************************************************************/
2494 bool TrimOligos::compareDNASeq(string oligo, string seq){
2497 int length = oligo.length();
2499 for(int i=0;i<length;i++){
2501 if(oligo[i] != seq[i]){
2502 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
2503 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
2504 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
2505 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
2506 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
2507 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2508 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
2509 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2510 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2511 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2512 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
2513 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2515 if(success == 0) { break; }
2524 catch(exception& e) {
2525 m->errorOut(e, "TrimOligos", "compareDNASeq");
2530 //********************************************************************/
2531 int TrimOligos::countDiffs(string oligo, string seq){
2534 int length = oligo.length();
2537 for(int i=0;i<length;i++){
2539 if(oligo[i] != seq[i]){
2540 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
2541 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
2542 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
2543 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
2544 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
2545 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2546 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
2547 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2548 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2549 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2550 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
2551 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2558 catch(exception& e) {
2559 m->errorOut(e, "TrimOligos", "countDiffs");
2563 //********************************************************************/
2564 string TrimOligos::reverseOligo(string oligo){
2566 string reverse = "";
2568 for(int i=oligo.length()-1;i>=0;i--){
2570 if(oligo[i] == 'A') { reverse += 'T'; }
2571 else if(oligo[i] == 'T'){ reverse += 'A'; }
2572 else if(oligo[i] == 'U'){ reverse += 'A'; }
2574 else if(oligo[i] == 'G'){ reverse += 'C'; }
2575 else if(oligo[i] == 'C'){ reverse += 'G'; }
2577 else if(oligo[i] == 'R'){ reverse += 'Y'; }
2578 else if(oligo[i] == 'Y'){ reverse += 'R'; }
2580 else if(oligo[i] == 'M'){ reverse += 'K'; }
2581 else if(oligo[i] == 'K'){ reverse += 'M'; }
2583 else if(oligo[i] == 'W'){ reverse += 'W'; }
2584 else if(oligo[i] == 'S'){ reverse += 'S'; }
2586 else if(oligo[i] == 'B'){ reverse += 'V'; }
2587 else if(oligo[i] == 'V'){ reverse += 'B'; }
2589 else if(oligo[i] == 'D'){ reverse += 'H'; }
2590 else if(oligo[i] == 'H'){ reverse += 'D'; }
2592 else if(oligo[i] == '-'){ reverse += '-'; }
2593 else { reverse += 'N'; }
2599 catch(exception& e) {
2600 m->errorOut(e, "TrimOligos", "reverseOligo");
2605 /********************************************************************/