5 * Created by westcott on 9/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
19 m = MothurOut::getInstance();
32 maxFBarcodeLength = 0;
33 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34 if(it->first.length() > maxFBarcodeLength){
35 maxFBarcodeLength = it->first.length();
39 map<string,int>::iterator it;
40 for(it=primers.begin();it!=primers.end();it++){
41 if(it->first.length() > maxFPrimerLength){
42 maxFPrimerLength = it->first.length();
47 for(int i = 0; i < linker.size(); i++){
48 if(linker[i].length() > maxLinkerLength){
49 maxLinkerLength = linker[i].length();
54 for(int i = 0; i < spacer.size(); i++){
55 if(spacer[i].length() > maxSpacerLength){
56 maxSpacerLength = spacer[i].length();
61 m->errorOut(e, "TrimOligos", "TrimOligos");
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
69 m = MothurOut::getInstance();
77 maxFBarcodeLength = 0;
78 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79 string forward = it->second.forward;
80 map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
82 if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
84 if (itForward == ifbarcodes.end()) {
85 vector<int> temp; temp.push_back(it->first);
86 ifbarcodes[forward] = temp;
87 }else { itForward->second.push_back(it->first); }
91 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92 string forward = it->second.forward;
93 map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
95 if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
97 if (itForward == ifprimers.end()) {
98 vector<int> temp; temp.push_back(it->first);
99 ifprimers[forward] = temp;
100 }else { itForward->second.push_back(it->first); }
103 maxRBarcodeLength = 0;
104 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105 string forward = it->second.reverse;
106 map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
108 if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
110 if (itForward == irbarcodes.end()) {
111 vector<int> temp; temp.push_back(it->first);
112 irbarcodes[forward] = temp;
113 }else { itForward->second.push_back(it->first); }
116 maxRPrimerLength = 0;
117 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118 string forward = it->second.reverse;
119 map<string, vector<int> >::iterator itForward = irprimers.find(forward);
121 if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
123 if (itForward == irprimers.end()) {
124 vector<int> temp; temp.push_back(it->first);
125 irprimers[forward] = temp;
126 }else { itForward->second.push_back(it->first); }
132 catch(exception& e) {
133 m->errorOut(e, "TrimOligos", "TrimOligos");
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
141 m = MothurOut::getInstance();
151 maxFBarcodeLength = 0;
152 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153 string oligo = it->first;
154 if(oligo.length() > maxFBarcodeLength){
155 maxFBarcodeLength = oligo.length();
158 maxRBarcodeLength = maxFBarcodeLength;
160 maxFPrimerLength = 0;
161 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162 string oligo = it->first;
163 if(oligo.length() > maxFPrimerLength){
164 maxFPrimerLength = oligo.length();
167 maxRPrimerLength = maxFPrimerLength;
169 catch(exception& e) {
170 m->errorOut(e, "TrimOligos", "TrimOligos");
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
180 string rawSequence = seq.getUnaligned();
182 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183 string oligo = it->first;
185 if(rawSequence.length() < oligo.length()) { break; }
188 int olength = oligo.length();
189 for (int j = 0; j < rawSequence.length()-olength; j++){
190 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191 string rawChunk = rawSequence.substr(j, olength);
192 if(compareDNASeq(oligo, rawChunk)) {
194 primerEnd = primerStart + olength;
201 primerStart = 0; primerEnd = 0;
202 //if you don't want to allow for diffs
203 if (pdiffs == 0) { return false; }
204 else { //try aligning and see if you can find it
206 //can you find the barcode
210 Alignment* alignment;
211 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212 else{ alignment = NULL; }
214 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
216 string prim = it->first;
218 int olength = prim.length();
219 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
221 for (int j = 0; j < rawSequence.length()-olength; j++){
223 string oligo = it->first;
225 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
227 string rawChunk = rawSequence.substr(j, olength+pdiffs);
229 //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230 alignment->alignPrimer(oligo, rawChunk);
231 oligo = alignment->getSeqAAln();
232 string temp = alignment->getSeqBAln();
234 int alnLength = oligo.length();
236 for(int i=oligo.length()-1;i>=0;i--){
237 if(oligo[i] != '-'){ alnLength = i+1; break; }
239 oligo = oligo.substr(0,alnLength);
240 temp = temp.substr(0,alnLength);
242 int numDiff = countDiffs(oligo, temp);
244 if(numDiff < minDiff){
248 primerEnd = primerStart + alnLength;
249 }else if(numDiff == minDiff){ minCount++; }
254 if (alignment != NULL) { delete alignment; }
256 if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches
257 else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers
261 primerStart = 0; primerEnd = 0;
265 catch(exception& e) {
266 m->errorOut(e, "TrimOligos", "stripForward");
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
274 string rawSequence = seq.getUnaligned();
276 for(int i=0;i<revPrimer.size();i++){
277 string oligo = revPrimer[i];
278 if(rawSequence.length() < oligo.length()) { break; }
281 int olength = oligo.length();
282 for (int j = rawSequence.length()-olength; j >= 0; j--){
283 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284 string rawChunk = rawSequence.substr(j, olength);
286 if(compareDNASeq(oligo, rawChunk)) {
288 primerEnd = primerStart + olength;
295 primerStart = 0; primerEnd = 0;
298 catch(exception& e) {
299 m->errorOut(e, "PcrSeqsCommand", "findReverse");
303 //*******************************************************************/
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
308 if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
310 string rawSequence = seq.getUnaligned();
311 int success = bdiffs + 1; //guilty until proven innocent
313 //can you find the barcode
314 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315 string oligo = it->first;
316 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
317 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
321 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
323 seq.setUnaligned(rawSequence.substr(oligo.length()));
325 if(qual.getName() != ""){
326 qual.trimQScores(oligo.length(), -1);
334 //if you found the barcode or if you don't want to allow for diffs
335 if ((bdiffs == 0) || (success == 0)) { return success; }
337 else { //try aligning and see if you can find it
338 Alignment* alignment;
339 if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340 else{ alignment = NULL; }
342 //can you find the barcode
348 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349 string oligo = it->first;
350 // int length = oligo.length();
352 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
353 success = bdiffs + 10;
357 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359 oligo = alignment->getSeqAAln();
360 string temp = alignment->getSeqBAln();
362 int alnLength = oligo.length();
364 for(int i=oligo.length()-1;i>=0;i--){
365 if(oligo[i] != '-'){ alnLength = i+1; break; }
367 oligo = oligo.substr(0,alnLength);
368 temp = temp.substr(0,alnLength);
369 int numDiff = countDiffs(oligo, temp);
371 if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
373 if(numDiff < minDiff){
376 minGroup = it->second;
378 for(int i=0;i<alnLength;i++){
384 else if(numDiff == minDiff){
390 if(minDiff > bdiffs) { success = minDiff; } //no good matches
391 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
392 else{ //use the best match
394 seq.setUnaligned(rawSequence.substr(minPos));
396 if(qual.getName() != ""){
397 qual.trimQScores(minPos, -1);
402 if (alignment != NULL) { delete alignment; }
409 catch(exception& e) {
410 m->errorOut(e, "TrimOligos", "stripBarcode");
414 //*******************************************************************/
415 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
417 //look for forward barcode
418 string rawFSequence = forwardSeq.getUnaligned();
419 string rawRSequence = reverseSeq.getUnaligned();
420 int success = bdiffs + 1; //guilty until proven innocent
422 //can you find the forward barcode
423 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
424 string foligo = it->second.forward;
425 string roligo = it->second.reverse;
427 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
428 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
431 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
432 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
436 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
438 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
439 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
445 //if you found the barcode or if you don't want to allow for diffs
446 if ((bdiffs == 0) || (success == 0)) { return success; }
447 else { //try aligning and see if you can find it
448 Alignment* alignment;
449 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
450 else{ alignment = NULL; }
452 //can you find the barcode
455 vector< vector<int> > minFGroup;
458 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
467 only want to look for forward = Sarah, John, Anna, Pat, Gail
468 reverse = Westcott, Schloss, Brown, Moore
469 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
471 //cout << endl << forwardSeq.getName() << endl;
472 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
473 string oligo = it->first;
475 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
476 success = bdiffs + 10;
479 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
480 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
481 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
482 oligo = alignment->getSeqAAln();
483 string temp = alignment->getSeqBAln();
485 int alnLength = oligo.length();
487 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
488 oligo = oligo.substr(0,alnLength);
489 temp = temp.substr(0,alnLength);
490 int numDiff = countDiffs(oligo, temp);
492 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
494 if (alnLength == 0) { numDiff = bdiffs + 100; }
495 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
497 if(numDiff < minDiff){
501 minFGroup.push_back(it->second);
504 for(int i=0;i<alnLength;i++){
509 minFPos.push_back(tempminFPos);
510 }else if(numDiff == minDiff){
511 minFGroup.push_back(it->second);
513 for(int i=0;i<alnLength;i++){
518 minFPos.push_back(tempminFPos);
522 //cout << minDiff << '\t' << minCount << '\t' << endl;
523 if(minDiff > bdiffs) { success = minDiff; } //no good matches
525 //check for reverse match
526 if (alignment != NULL) { delete alignment; }
528 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
529 else{ alignment = NULL; }
531 //can you find the barcode
534 vector< vector<int> > minRGroup;
537 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
538 string oligo = it->first;
539 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
540 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
541 success = bdiffs + 10;
545 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
546 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
547 oligo = alignment->getSeqAAln();
548 string temp = alignment->getSeqBAln();
550 int alnLength = oligo.length();
551 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
552 oligo = oligo.substr(0,alnLength);
553 temp = temp.substr(0,alnLength);
554 int numDiff = countDiffs(oligo, temp);
556 if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
558 if (alnLength == 0) { numDiff = bdiffs + 100; }
560 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
561 if(numDiff < minDiff){
565 minRGroup.push_back(it->second);
568 for(int i=0;i<alnLength;i++){
573 minRPos.push_back(tempminRPos);
574 }else if(numDiff == minDiff){
576 for(int i=0;i<alnLength;i++){
581 minRPos.push_back(tempminRPos);
582 minRGroup.push_back(it->second);
587 if(minDiff > bdiffs) { success = minDiff; } //no good matches
589 bool foundMatch = false;
591 for (int i = 0; i < minFGroup.size(); i++) {
592 for (int j = 0; j < minFGroup[i].size(); j++) {
593 for (int k = 0; k < minRGroup.size(); k++) {
594 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
601 if (matches.size() == 1) {
608 //we have an acceptable match for the forward and reverse, but do they match?
610 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
611 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
613 }else { success = bdiffs + 100; }
617 if (alignment != NULL) { delete alignment; }
623 catch(exception& e) {
624 m->errorOut(e, "TrimOligos", "stripIBarcode");
629 //*******************************************************************/
630 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
632 //look for forward barcode
633 string rawFSequence = forwardSeq.getUnaligned();
634 string rawRSequence = reverseSeq.getUnaligned();
635 int success = bdiffs + 1; //guilty until proven innocent
637 //can you find the forward barcode
638 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
639 string foligo = it->second.forward;
640 string roligo = it->second.reverse;
642 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
643 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
646 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
647 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
651 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
653 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
654 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
655 forwardQual.trimQScores(foligo.length(), -1);
656 reverseQual.trimQScores(roligo.length(), -1);
662 //if you found the barcode or if you don't want to allow for diffs
663 if ((bdiffs == 0) || (success == 0)) { return success; }
664 else { //try aligning and see if you can find it
665 Alignment* alignment;
666 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
667 else{ alignment = NULL; }
669 //can you find the barcode
672 vector< vector<int> > minFGroup;
675 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
684 only want to look for forward = Sarah, John, Anna, Pat, Gail
685 reverse = Westcott, Schloss, Brown, Moore
686 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
688 //cout << endl << forwardSeq.getName() << endl;
689 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
690 string oligo = it->first;
692 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
693 success = bdiffs + 10;
696 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
697 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
698 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
699 oligo = alignment->getSeqAAln();
700 string temp = alignment->getSeqBAln();
702 int alnLength = oligo.length();
704 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
705 oligo = oligo.substr(0,alnLength);
706 temp = temp.substr(0,alnLength);
707 int numDiff = countDiffs(oligo, temp);
709 if (alnLength == 0) { numDiff = bdiffs + 100; }
710 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
712 if(numDiff < minDiff){
716 minFGroup.push_back(it->second);
719 for(int i=0;i<alnLength;i++){
724 minFPos.push_back(tempminFPos);
725 }else if(numDiff == minDiff){
726 minFGroup.push_back(it->second);
728 for(int i=0;i<alnLength;i++){
733 minFPos.push_back(tempminFPos);
737 //cout << minDiff << '\t' << minCount << '\t' << endl;
738 if(minDiff > bdiffs) { success = minDiff; } //no good matches
740 //check for reverse match
741 if (alignment != NULL) { delete alignment; }
743 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
744 else{ alignment = NULL; }
746 //can you find the barcode
749 vector< vector<int> > minRGroup;
752 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
753 string oligo = it->first;
754 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
755 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
756 success = bdiffs + 10;
760 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
761 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
762 oligo = alignment->getSeqAAln();
763 string temp = alignment->getSeqBAln();
765 int alnLength = oligo.length();
766 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
767 oligo = oligo.substr(0,alnLength);
768 temp = temp.substr(0,alnLength);
769 int numDiff = countDiffs(oligo, temp);
770 if (alnLength == 0) { numDiff = bdiffs + 100; }
772 if (m->debug) { m->mothurOut("[DEBUG]: reverse " + reverseSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
774 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
775 if(numDiff < minDiff){
779 minRGroup.push_back(it->second);
782 for(int i=0;i<alnLength;i++){
787 minRPos.push_back(tempminRPos);
788 }else if(numDiff == minDiff){
790 for(int i=0;i<alnLength;i++){
795 minRPos.push_back(tempminRPos);
796 minRGroup.push_back(it->second);
801 if(minDiff > bdiffs) { success = minDiff; } //no good matches
803 bool foundMatch = false;
805 for (int i = 0; i < minFGroup.size(); i++) {
806 for (int j = 0; j < minFGroup[i].size(); j++) {
807 for (int k = 0; k < minRGroup.size(); k++) {
808 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
815 if (matches.size() == 1) {
822 //we have an acceptable match for the forward and reverse, but do they match?
824 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
825 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
826 forwardQual.trimQScores(fStart, -1);
827 reverseQual.trimQScores(rStart, -1);
829 }else { success = bdiffs + 100; }
833 if (alignment != NULL) { delete alignment; }
839 catch(exception& e) {
840 m->errorOut(e, "TrimOligos", "stripIBarcode");
845 //*******************************************************************/
846 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
848 //look for forward barcode
849 string rawSeq = seq.getUnaligned();
850 int success = bdiffs + 1; //guilty until proven innocent
851 //cout << seq.getName() << endl;
852 //can you find the forward barcode
853 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
854 string foligo = it->second.forward;
855 string roligo = it->second.reverse;
856 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
857 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
858 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
859 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
862 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
863 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
867 if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
869 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
871 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
872 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
873 if(qual.getName() != ""){
874 qual.trimQScores(-1, rawSeq.length()-roligo.length());
875 qual.trimQScores(foligo.length(), -1);
881 //cout << "success=" << success << endl;
882 //if you found the barcode or if you don't want to allow for diffs
883 if ((bdiffs == 0) || (success == 0)) { return success; }
884 else { //try aligning and see if you can find it
885 Alignment* alignment;
886 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
887 else{ alignment = NULL; }
889 //can you find the barcode
892 vector< vector<int> > minFGroup;
895 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
904 only want to look for forward = Sarah, John, Anna, Pat, Gail
905 reverse = Westcott, Schloss, Brown, Moore
906 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
908 //cout << endl << forwardSeq.getName() << endl;
909 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
910 string oligo = it->first;
912 if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
913 success = bdiffs + 10;
916 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
917 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
918 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
919 oligo = alignment->getSeqAAln();
920 string temp = alignment->getSeqBAln();
922 int alnLength = oligo.length();
924 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
925 oligo = oligo.substr(0,alnLength);
926 temp = temp.substr(0,alnLength);
927 int numDiff = countDiffs(oligo, temp);
929 if (alnLength == 0) { numDiff = bdiffs + 100; }
930 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
932 if(numDiff < minDiff){
936 minFGroup.push_back(it->second);
939 for(int i=0;i<alnLength;i++){
944 minFPos.push_back(tempminFPos);
945 }else if(numDiff == minDiff){
946 minFGroup.push_back(it->second);
948 for(int i=0;i<alnLength;i++){
953 minFPos.push_back(tempminFPos);
957 //cout << minDiff << '\t' << minCount << '\t' << endl;
958 if(minDiff > bdiffs) { success = minDiff; } //no good matches
960 //check for reverse match
961 if (alignment != NULL) { delete alignment; }
963 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
964 else{ alignment = NULL; }
966 //can you find the barcode
969 vector< vector<int> > minRGroup;
972 string rawRSequence = reverseOligo(seq.getUnaligned());
973 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
974 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
975 string oligo = reverseOligo(it->first);
976 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
977 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
978 success = bdiffs + 10;
982 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
983 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
984 oligo = alignment->getSeqAAln();
985 string temp = alignment->getSeqBAln();
987 int alnLength = oligo.length();
988 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
989 oligo = oligo.substr(0,alnLength);
990 temp = temp.substr(0,alnLength);
991 int numDiff = countDiffs(oligo, temp);
992 if (alnLength == 0) { numDiff = bdiffs + 100; }
994 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
995 if(numDiff < minDiff){
999 minRGroup.push_back(it->second);
1000 int tempminRPos = 0;
1002 for(int i=0;i<alnLength;i++){
1007 minRPos.push_back(tempminRPos);
1008 }else if(numDiff == minDiff){
1009 int tempminRPos = 0;
1010 for(int i=0;i<alnLength;i++){
1015 minRPos.push_back(tempminRPos);
1016 minRGroup.push_back(it->second);
1021 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1023 bool foundMatch = false;
1024 vector<int> matches;
1025 for (int i = 0; i < minFGroup.size(); i++) {
1026 for (int j = 0; j < minFGroup[i].size(); j++) {
1027 for (int k = 0; k < minRGroup.size(); k++) {
1028 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1035 if (matches.size() == 1) {
1038 fStart = minFPos[0];
1039 rStart = rawSeq.length() - minRPos[0];
1040 if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
1043 //we have an acceptable match for the forward and reverse, but do they match?
1045 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1046 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1047 if(qual.getName() != ""){
1048 qual.trimQScores(-1, rStart);
1049 qual.trimQScores(fStart, -1);
1052 //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1053 }else { success = bdiffs + 100; }
1057 if (alignment != NULL) { delete alignment; }
1063 catch(exception& e) {
1064 m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1069 //*******************************************************************/
1070 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1073 string rawSeq = seq.getUnaligned();
1074 int success = pdiffs + 1; //guilty until proven innocent
1075 //cout << seq.getName() << endl;
1076 //can you find the forward
1077 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1078 string foligo = it->second.forward;
1079 string roligo = it->second.reverse;
1081 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1082 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1083 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1084 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1087 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1088 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1092 if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
1094 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1097 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1098 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1099 if(qual.getName() != ""){
1100 qual.trimQScores(-1, rawSeq.length()-roligo.length());
1101 qual.trimQScores(foligo.length(), -1);
1108 //cout << "success=" << success << endl;
1109 //if you found the barcode or if you don't want to allow for diffs
1110 if ((pdiffs == 0) || (success == 0)) { return success; }
1111 else { //try aligning and see if you can find it
1112 Alignment* alignment;
1113 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1114 else{ alignment = NULL; }
1116 //can you find the barcode
1119 vector< vector<int> > minFGroup;
1120 vector<int> minFPos;
1122 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1131 only want to look for forward = Sarah, John, Anna, Pat, Gail
1132 reverse = Westcott, Schloss, Brown, Moore
1133 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1135 //cout << endl << forwardSeq.getName() << endl;
1136 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1137 string oligo = it->first;
1139 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1140 success = pdiffs + 10;
1143 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
1144 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1145 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1146 oligo = alignment->getSeqAAln();
1147 string temp = alignment->getSeqBAln();
1149 int alnLength = oligo.length();
1151 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1152 oligo = oligo.substr(0,alnLength);
1153 temp = temp.substr(0,alnLength);
1154 int numDiff = countDiffs(oligo, temp);
1156 if (alnLength == 0) { numDiff = pdiffs + 100; }
1157 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1159 if(numDiff < minDiff){
1163 minFGroup.push_back(it->second);
1164 int tempminFPos = 0;
1166 for(int i=0;i<alnLength;i++){
1171 minFPos.push_back(tempminFPos);
1172 }else if(numDiff == minDiff){
1173 minFGroup.push_back(it->second);
1174 int tempminFPos = 0;
1175 for(int i=0;i<alnLength;i++){
1180 minFPos.push_back(tempminFPos);
1184 //cout << minDiff << '\t' << minCount << '\t' << endl;
1185 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1187 //check for reverse match
1188 if (alignment != NULL) { delete alignment; }
1190 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1191 else{ alignment = NULL; }
1193 //can you find the barcode
1196 vector< vector<int> > minRGroup;
1197 vector<int> minRPos;
1199 string rawRSequence = reverseOligo(seq.getUnaligned());
1201 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1202 string oligo = reverseOligo(it->first);
1203 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1204 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1205 success = pdiffs + 10;
1209 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1210 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1211 oligo = alignment->getSeqAAln();
1212 string temp = alignment->getSeqBAln();
1214 int alnLength = oligo.length();
1215 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1216 oligo = oligo.substr(0,alnLength);
1217 temp = temp.substr(0,alnLength);
1218 int numDiff = countDiffs(oligo, temp);
1219 if (alnLength == 0) { numDiff = pdiffs + 100; }
1221 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1222 if(numDiff < minDiff){
1226 minRGroup.push_back(it->second);
1227 int tempminRPos = 0;
1229 for(int i=0;i<alnLength;i++){
1234 minRPos.push_back(tempminRPos);
1235 }else if(numDiff == minDiff){
1236 int tempminRPos = 0;
1237 for(int i=0;i<alnLength;i++){
1242 minRPos.push_back(tempminRPos);
1243 minRGroup.push_back(it->second);
1248 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1250 bool foundMatch = false;
1251 vector<int> matches;
1252 for (int i = 0; i < minFGroup.size(); i++) {
1253 for (int j = 0; j < minFGroup[i].size(); j++) {
1254 for (int k = 0; k < minRGroup.size(); k++) {
1255 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1262 if (matches.size() == 1) {
1265 fStart = minFPos[0];
1266 rStart = rawSeq.length() - minRPos[0];
1267 if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
1270 //we have an acceptable match for the forward and reverse, but do they match?
1273 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1274 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1275 if(qual.getName() != ""){
1276 qual.trimQScores(-1, rStart);
1277 qual.trimQScores(fStart, -1);
1281 //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1282 }else { success = pdiffs + 100; }
1286 if (alignment != NULL) { delete alignment; }
1292 catch(exception& e) {
1293 m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1299 //*******************************************************************/
1300 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1302 //look for forward barcode
1303 string rawFSequence = forwardSeq.getUnaligned();
1304 string rawRSequence = reverseSeq.getUnaligned();
1305 int success = pdiffs + 1; //guilty until proven innocent
1307 //can you find the forward barcode
1308 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1309 string foligo = it->second.forward;
1310 string roligo = it->second.reverse;
1312 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1313 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1316 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1317 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1321 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
1323 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1324 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1325 forwardQual.trimQScores(foligo.length(), -1);
1326 reverseQual.trimQScores(roligo.length(), -1);
1332 //if you found the barcode or if you don't want to allow for diffs
1333 if ((pdiffs == 0) || (success == 0)) { return success; }
1334 else { //try aligning and see if you can find it
1335 Alignment* alignment;
1336 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1337 else{ alignment = NULL; }
1339 //can you find the barcode
1342 vector< vector<int> > minFGroup;
1343 vector<int> minFPos;
1345 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1354 only want to look for forward = Sarah, John, Anna, Pat, Gail
1355 reverse = Westcott, Schloss, Brown, Moore
1356 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1358 //cout << endl << forwardSeq.getName() << endl;
1359 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1360 string oligo = it->first;
1362 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1363 success = pdiffs + 10;
1366 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1367 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1368 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1369 oligo = alignment->getSeqAAln();
1370 string temp = alignment->getSeqBAln();
1372 int alnLength = oligo.length();
1374 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1375 oligo = oligo.substr(0,alnLength);
1376 temp = temp.substr(0,alnLength);
1377 int numDiff = countDiffs(oligo, temp);
1379 if (alnLength == 0) { numDiff = pdiffs + 100; }
1380 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1382 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
1384 if(numDiff < minDiff){
1388 minFGroup.push_back(it->second);
1389 int tempminFPos = 0;
1391 for(int i=0;i<alnLength;i++){
1396 minFPos.push_back(tempminFPos);
1397 }else if(numDiff == minDiff){
1398 minFGroup.push_back(it->second);
1399 int tempminFPos = 0;
1400 for(int i=0;i<alnLength;i++){
1405 minFPos.push_back(tempminFPos);
1409 //cout << minDiff << '\t' << minCount << '\t' << endl;
1410 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1412 //check for reverse match
1413 if (alignment != NULL) { delete alignment; }
1415 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1416 else{ alignment = NULL; }
1418 //can you find the barcode
1421 vector< vector<int> > minRGroup;
1422 vector<int> minRPos;
1424 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1425 string oligo = it->first;
1426 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1427 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1428 success = pdiffs + 10;
1432 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1433 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1434 oligo = alignment->getSeqAAln();
1435 string temp = alignment->getSeqBAln();
1437 int alnLength = oligo.length();
1438 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1439 oligo = oligo.substr(0,alnLength);
1440 temp = temp.substr(0,alnLength);
1441 int numDiff = countDiffs(oligo, temp);
1442 if (alnLength == 0) { numDiff = pdiffs + 100; }
1444 if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
1446 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1447 if(numDiff < minDiff){
1451 minRGroup.push_back(it->second);
1452 int tempminRPos = 0;
1454 for(int i=0;i<alnLength;i++){
1459 minRPos.push_back(tempminRPos);
1460 }else if(numDiff == minDiff){
1461 int tempminRPos = 0;
1462 for(int i=0;i<alnLength;i++){
1467 minRPos.push_back(tempminRPos);
1468 minRGroup.push_back(it->second);
1473 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1475 bool foundMatch = false;
1476 vector<int> matches;
1477 for (int i = 0; i < minFGroup.size(); i++) {
1478 for (int j = 0; j < minFGroup[i].size(); j++) {
1479 for (int k = 0; k < minRGroup.size(); k++) {
1480 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1487 if (matches.size() == 1) {
1490 fStart = minFPos[0];
1491 rStart = minRPos[0];
1494 //we have an acceptable match for the forward and reverse, but do they match?
1496 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1497 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1498 forwardQual.trimQScores(fStart, -1);
1499 reverseQual.trimQScores(rStart, -1);
1501 }else { success = pdiffs + 100; }
1505 if (alignment != NULL) { delete alignment; }
1511 catch(exception& e) {
1512 m->errorOut(e, "TrimOligos", "stripIForward");
1517 //*******************************************************************/
1518 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1520 //look for forward barcode
1521 string rawFSequence = forwardSeq.getUnaligned();
1522 string rawRSequence = reverseSeq.getUnaligned();
1523 int success = pdiffs + 1; //guilty until proven innocent
1525 //can you find the forward barcode
1526 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1527 string foligo = it->second.forward;
1528 string roligo = it->second.reverse;
1530 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1531 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1534 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1535 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1539 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1541 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1542 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1548 //if you found the barcode or if you don't want to allow for diffs
1549 if ((pdiffs == 0) || (success == 0)) { return success; }
1550 else { //try aligning and see if you can find it
1551 Alignment* alignment;
1552 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1553 else{ alignment = NULL; }
1555 //can you find the barcode
1558 vector< vector<int> > minFGroup;
1559 vector<int> minFPos;
1561 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1570 only want to look for forward = Sarah, John, Anna, Pat, Gail
1571 reverse = Westcott, Schloss, Brown, Moore
1572 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1574 //cout << endl << forwardSeq.getName() << endl;
1575 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1576 string oligo = it->first;
1578 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1579 success = pdiffs + 10;
1582 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1583 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1584 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1585 oligo = alignment->getSeqAAln();
1586 string temp = alignment->getSeqBAln();
1588 int alnLength = oligo.length();
1590 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1591 oligo = oligo.substr(0,alnLength);
1592 temp = temp.substr(0,alnLength);
1593 int numDiff = countDiffs(oligo, temp);
1595 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
1597 if (alnLength == 0) { numDiff = pdiffs + 100; }
1598 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1600 if(numDiff < minDiff){
1604 minFGroup.push_back(it->second);
1605 int tempminFPos = 0;
1607 for(int i=0;i<alnLength;i++){
1612 minFPos.push_back(tempminFPos);
1613 }else if(numDiff == minDiff){
1614 minFGroup.push_back(it->second);
1615 int tempminFPos = 0;
1616 for(int i=0;i<alnLength;i++){
1621 minFPos.push_back(tempminFPos);
1625 //cout << minDiff << '\t' << minCount << '\t' << endl;
1626 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1628 //check for reverse match
1629 if (alignment != NULL) { delete alignment; }
1631 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1632 else{ alignment = NULL; }
1634 //can you find the barcode
1637 vector< vector<int> > minRGroup;
1638 vector<int> minRPos;
1640 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1641 string oligo = it->first;
1642 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1643 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1644 success = pdiffs + 10;
1648 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1649 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1650 oligo = alignment->getSeqAAln();
1651 string temp = alignment->getSeqBAln();
1653 int alnLength = oligo.length();
1654 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1655 oligo = oligo.substr(0,alnLength);
1656 temp = temp.substr(0,alnLength);
1657 int numDiff = countDiffs(oligo, temp);
1659 if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
1661 if (alnLength == 0) { numDiff = pdiffs + 100; }
1663 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1664 if(numDiff < minDiff){
1668 minRGroup.push_back(it->second);
1669 int tempminRPos = 0;
1671 for(int i=0;i<alnLength;i++){
1676 minRPos.push_back(tempminRPos);
1677 }else if(numDiff == minDiff){
1678 int tempminRPos = 0;
1679 for(int i=0;i<alnLength;i++){
1684 minRPos.push_back(tempminRPos);
1685 minRGroup.push_back(it->second);
1690 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1692 bool foundMatch = false;
1693 vector<int> matches;
1694 for (int i = 0; i < minFGroup.size(); i++) {
1695 for (int j = 0; j < minFGroup[i].size(); j++) {
1696 for (int k = 0; k < minRGroup.size(); k++) {
1697 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1704 if (matches.size() == 1) {
1707 fStart = minFPos[0];
1708 rStart = minRPos[0];
1711 //we have an acceptable match for the forward and reverse, but do they match?
1713 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1714 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1716 }else { success = pdiffs + 100; }
1720 if (alignment != NULL) { delete alignment; }
1726 catch(exception& e) {
1727 m->errorOut(e, "TrimOligos", "stripIForward");
1732 //*******************************************************************/
1733 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1736 string rawSequence = seq.getUnaligned();
1737 int success = bdiffs + 1; //guilty until proven innocent
1739 //can you find the barcode
1740 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1741 string oligo = it->first;
1742 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1743 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1747 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1749 seq.setUnaligned(rawSequence.substr(oligo.length()));
1756 //if you found the barcode or if you don't want to allow for diffs
1757 if ((bdiffs == 0) || (success == 0)) { return success; }
1759 else { //try aligning and see if you can find it
1760 Alignment* alignment;
1761 if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1762 else{ alignment = NULL; }
1764 //can you find the barcode
1770 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1771 string oligo = it->first;
1772 // int length = oligo.length();
1774 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
1775 success = bdiffs + 10;
1779 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1780 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1781 oligo = alignment->getSeqAAln();
1782 string temp = alignment->getSeqBAln();
1784 int alnLength = oligo.length();
1786 for(int i=oligo.length()-1;i>=0;i--){
1787 if(oligo[i] != '-'){ alnLength = i+1; break; }
1789 oligo = oligo.substr(0,alnLength);
1790 temp = temp.substr(0,alnLength);
1792 int numDiff = countDiffs(oligo, temp);
1794 if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
1796 if(numDiff < minDiff){
1799 minGroup = it->second;
1801 for(int i=0;i<alnLength;i++){
1807 else if(numDiff == minDiff){
1813 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1814 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1815 else{ //use the best match
1817 seq.setUnaligned(rawSequence.substr(minPos));
1821 if (alignment != NULL) { delete alignment; }
1828 catch(exception& e) {
1829 m->errorOut(e, "TrimOligos", "stripBarcode");
1835 /********************************************************************/
1836 int TrimOligos::stripForward(Sequence& seq, int& group){
1839 string rawSequence = seq.getUnaligned();
1840 int success = pdiffs + 1; //guilty until proven innocent
1842 //can you find the primer
1843 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1844 string oligo = it->first;
1845 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1846 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1850 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1852 seq.setUnaligned(rawSequence.substr(oligo.length()));
1858 //if you found the barcode or if you don't want to allow for diffs
1859 if ((pdiffs == 0) || (success == 0)) { return success; }
1861 else { //try aligning and see if you can find it
1862 Alignment* alignment;
1863 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1864 else{ alignment = NULL; }
1866 //can you find the barcode
1872 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1873 string oligo = it->first;
1874 // int length = oligo.length();
1876 if(rawSequence.length() < maxFPrimerLength){
1877 success = pdiffs + 100;
1881 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1882 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1883 oligo = alignment->getSeqAAln();
1884 string temp = alignment->getSeqBAln();
1886 int alnLength = oligo.length();
1888 for(int i=oligo.length()-1;i>=0;i--){
1889 if(oligo[i] != '-'){ alnLength = i+1; break; }
1891 oligo = oligo.substr(0,alnLength);
1892 temp = temp.substr(0,alnLength);
1894 int numDiff = countDiffs(oligo, temp);
1896 if(numDiff < minDiff){
1899 minGroup = it->second;
1901 for(int i=0;i<alnLength;i++){
1907 else if(numDiff == minDiff){
1913 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1914 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1915 else{ //use the best match
1917 seq.setUnaligned(rawSequence.substr(minPos));
1921 if (alignment != NULL) { delete alignment; }
1928 catch(exception& e) {
1929 m->errorOut(e, "TrimOligos", "stripForward");
1933 //*******************************************************************/
1934 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1937 if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
1939 string rawSequence = seq.getUnaligned();
1940 int success = pdiffs + 1; //guilty until proven innocent
1942 //can you find the primer
1943 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1944 string oligo = it->first;
1945 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1946 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1950 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1952 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1953 if(qual.getName() != ""){
1954 if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
1961 //if you found the barcode or if you don't want to allow for diffs
1962 if ((pdiffs == 0) || (success == 0)) { return success; }
1964 else { //try aligning and see if you can find it
1965 Alignment* alignment;
1966 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1967 else{ alignment = NULL; }
1969 //can you find the barcode
1975 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1976 string oligo = it->first;
1977 // int length = oligo.length();
1979 if(rawSequence.length() < maxFPrimerLength){
1980 success = pdiffs + 100;
1984 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1985 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1986 oligo = alignment->getSeqAAln();
1987 string temp = alignment->getSeqBAln();
1989 int alnLength = oligo.length();
1991 for(int i=oligo.length()-1;i>=0;i--){
1992 if(oligo[i] != '-'){ alnLength = i+1; break; }
1994 oligo = oligo.substr(0,alnLength);
1995 temp = temp.substr(0,alnLength);
1997 int numDiff = countDiffs(oligo, temp);
1999 if(numDiff < minDiff){
2002 minGroup = it->second;
2004 for(int i=0;i<alnLength;i++){
2010 else if(numDiff == minDiff){
2016 if(minDiff > pdiffs) { success = minDiff; } //no good matches
2017 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
2018 else{ //use the best match
2020 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2021 if(qual.getName() != ""){
2022 if (!keepForward) { qual.trimQScores(minPos, -1); }
2027 if (alignment != NULL) { delete alignment; }
2034 catch(exception& e) {
2035 m->errorOut(e, "TrimOligos", "stripForward");
2039 //******************************************************************/
2040 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2042 string rawSequence = seq.getUnaligned();
2043 bool success = 0; //guilty until proven innocent
2045 for(int i=0;i<revPrimer.size();i++){
2046 string oligo = revPrimer[i];
2048 if(rawSequence.length() < oligo.length()){
2053 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2054 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2055 if(qual.getName() != ""){
2056 qual.trimQScores(-1, rawSequence.length()-oligo.length());
2065 catch(exception& e) {
2066 m->errorOut(e, "TrimOligos", "stripReverse");
2070 //******************************************************************/
2071 bool TrimOligos::stripReverse(Sequence& seq){
2074 string rawSequence = seq.getUnaligned();
2075 bool success = 0; //guilty until proven innocent
2077 for(int i=0;i<revPrimer.size();i++){
2078 string oligo = revPrimer[i];
2080 if(rawSequence.length() < oligo.length()){
2085 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2086 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2095 catch(exception& e) {
2096 m->errorOut(e, "TrimOligos", "stripReverse");
2100 //******************************************************************/
2101 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2103 string rawSequence = seq.getUnaligned();
2104 bool success = ldiffs + 1; //guilty until proven innocent
2106 for(int i=0;i<linker.size();i++){
2107 string oligo = linker[i];
2109 if(rawSequence.length() < oligo.length()){
2110 success = ldiffs + 10;
2114 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2115 seq.setUnaligned(rawSequence.substr(oligo.length()));
2116 if(qual.getName() != ""){
2117 qual.trimQScores(oligo.length(), -1);
2124 //if you found the linker or if you don't want to allow for diffs
2125 if ((ldiffs == 0) || (success == 0)) { return success; }
2127 else { //try aligning and see if you can find it
2128 Alignment* alignment;
2129 if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2130 else{ alignment = NULL; }
2132 //can you find the barcode
2137 for(int i = 0; i < linker.size(); i++){
2138 string oligo = linker[i];
2139 // int length = oligo.length();
2141 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2142 success = ldiffs + 10;
2146 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2147 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2148 oligo = alignment->getSeqAAln();
2149 string temp = alignment->getSeqBAln();
2151 int alnLength = oligo.length();
2153 for(int i=oligo.length()-1;i>=0;i--){
2154 if(oligo[i] != '-'){ alnLength = i+1; break; }
2156 oligo = oligo.substr(0,alnLength);
2157 temp = temp.substr(0,alnLength);
2159 int numDiff = countDiffs(oligo, temp);
2161 if(numDiff < minDiff){
2165 for(int i=0;i<alnLength;i++){
2171 else if(numDiff == minDiff){
2177 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2178 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2179 else{ //use the best match
2180 seq.setUnaligned(rawSequence.substr(minPos));
2182 if(qual.getName() != ""){
2183 qual.trimQScores(minPos, -1);
2188 if (alignment != NULL) { delete alignment; }
2196 catch(exception& e) {
2197 m->errorOut(e, "TrimOligos", "stripLinker");
2201 //******************************************************************/
2202 bool TrimOligos::stripLinker(Sequence& seq){
2205 string rawSequence = seq.getUnaligned();
2206 bool success = ldiffs +1; //guilty until proven innocent
2208 for(int i=0;i<linker.size();i++){
2209 string oligo = linker[i];
2211 if(rawSequence.length() < oligo.length()){
2212 success = ldiffs +10;
2216 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2217 seq.setUnaligned(rawSequence.substr(oligo.length()));
2223 //if you found the linker or if you don't want to allow for diffs
2224 if ((ldiffs == 0) || (success == 0)) { return success; }
2226 else { //try aligning and see if you can find it
2227 Alignment* alignment;
2228 if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2229 else{ alignment = NULL; }
2231 //can you find the barcode
2236 for(int i = 0; i < linker.size(); i++){
2237 string oligo = linker[i];
2238 // int length = oligo.length();
2240 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2241 success = ldiffs + 10;
2245 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2246 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2247 oligo = alignment->getSeqAAln();
2248 string temp = alignment->getSeqBAln();
2250 int alnLength = oligo.length();
2252 for(int i=oligo.length()-1;i>=0;i--){
2253 if(oligo[i] != '-'){ alnLength = i+1; break; }
2255 oligo = oligo.substr(0,alnLength);
2256 temp = temp.substr(0,alnLength);
2258 int numDiff = countDiffs(oligo, temp);
2260 if(numDiff < minDiff){
2264 for(int i=0;i<alnLength;i++){
2270 else if(numDiff == minDiff){
2276 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2277 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2278 else{ //use the best match
2279 seq.setUnaligned(rawSequence.substr(minPos));
2283 if (alignment != NULL) { delete alignment; }
2290 catch(exception& e) {
2291 m->errorOut(e, "TrimOligos", "stripLinker");
2296 //******************************************************************/
2297 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2299 string rawSequence = seq.getUnaligned();
2300 bool success = sdiffs+1; //guilty until proven innocent
2302 for(int i=0;i<spacer.size();i++){
2303 string oligo = spacer[i];
2305 if(rawSequence.length() < oligo.length()){
2306 success = sdiffs+10;
2310 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2311 seq.setUnaligned(rawSequence.substr(oligo.length()));
2312 if(qual.getName() != ""){
2313 qual.trimQScores(oligo.length(), -1);
2320 //if you found the spacer or if you don't want to allow for diffs
2321 if ((sdiffs == 0) || (success == 0)) { return success; }
2323 else { //try aligning and see if you can find it
2324 Alignment* alignment;
2325 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2326 else{ alignment = NULL; }
2328 //can you find the barcode
2333 for(int i = 0; i < spacer.size(); i++){
2334 string oligo = spacer[i];
2335 // int length = oligo.length();
2337 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2338 success = sdiffs + 10;
2342 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2343 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2344 oligo = alignment->getSeqAAln();
2345 string temp = alignment->getSeqBAln();
2347 int alnLength = oligo.length();
2349 for(int i=oligo.length()-1;i>=0;i--){
2350 if(oligo[i] != '-'){ alnLength = i+1; break; }
2352 oligo = oligo.substr(0,alnLength);
2353 temp = temp.substr(0,alnLength);
2355 int numDiff = countDiffs(oligo, temp);
2357 if(numDiff < minDiff){
2361 for(int i=0;i<alnLength;i++){
2367 else if(numDiff == minDiff){
2373 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2374 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2375 else{ //use the best match
2376 seq.setUnaligned(rawSequence.substr(minPos));
2378 if(qual.getName() != ""){
2379 qual.trimQScores(minPos, -1);
2384 if (alignment != NULL) { delete alignment; }
2392 catch(exception& e) {
2393 m->errorOut(e, "TrimOligos", "stripSpacer");
2397 //******************************************************************/
2398 bool TrimOligos::stripSpacer(Sequence& seq){
2401 string rawSequence = seq.getUnaligned();
2402 bool success = sdiffs+1; //guilty until proven innocent
2404 for(int i=0;i<spacer.size();i++){
2405 string oligo = spacer[i];
2407 if(rawSequence.length() < oligo.length()){
2408 success = sdiffs+10;
2412 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2413 seq.setUnaligned(rawSequence.substr(oligo.length()));
2419 //if you found the spacer or if you don't want to allow for diffs
2420 if ((sdiffs == 0) || (success == 0)) { return success; }
2422 else { //try aligning and see if you can find it
2423 Alignment* alignment;
2424 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2425 else{ alignment = NULL; }
2427 //can you find the barcode
2432 for(int i = 0; i < spacer.size(); i++){
2433 string oligo = spacer[i];
2434 // int length = oligo.length();
2436 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2437 success = sdiffs + 10;
2441 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2442 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2443 oligo = alignment->getSeqAAln();
2444 string temp = alignment->getSeqBAln();
2446 int alnLength = oligo.length();
2448 for(int i=oligo.length()-1;i>=0;i--){
2449 if(oligo[i] != '-'){ alnLength = i+1; break; }
2451 oligo = oligo.substr(0,alnLength);
2452 temp = temp.substr(0,alnLength);
2454 int numDiff = countDiffs(oligo, temp);
2456 if(numDiff < minDiff){
2460 for(int i=0;i<alnLength;i++){
2466 else if(numDiff == minDiff){
2472 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2473 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2474 else{ //use the best match
2475 seq.setUnaligned(rawSequence.substr(minPos));
2479 if (alignment != NULL) { delete alignment; }
2486 catch(exception& e) {
2487 m->errorOut(e, "TrimOligos", "stripSpacer");
2492 //******************************************************************/
2493 bool TrimOligos::compareDNASeq(string oligo, string seq){
2496 int length = oligo.length();
2498 for(int i=0;i<length;i++){
2500 if(oligo[i] != seq[i]){
2501 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
2502 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
2503 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
2504 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
2505 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
2506 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2507 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
2508 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2509 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2510 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2511 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
2512 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2514 if(success == 0) { break; }
2523 catch(exception& e) {
2524 m->errorOut(e, "TrimOligos", "compareDNASeq");
2529 //********************************************************************/
2530 int TrimOligos::countDiffs(string oligo, string seq){
2533 int length = oligo.length();
2536 for(int i=0;i<length;i++){
2538 if(oligo[i] != seq[i]){
2539 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
2540 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
2541 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
2542 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
2543 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
2544 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2545 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
2546 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2547 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2548 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2549 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
2550 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2557 catch(exception& e) {
2558 m->errorOut(e, "TrimOligos", "countDiffs");
2562 //********************************************************************/
2563 string TrimOligos::reverseOligo(string oligo){
2565 string reverse = "";
2567 for(int i=oligo.length()-1;i>=0;i--){
2569 if(oligo[i] == 'A') { reverse += 'T'; }
2570 else if(oligo[i] == 'T'){ reverse += 'A'; }
2571 else if(oligo[i] == 'U'){ reverse += 'A'; }
2573 else if(oligo[i] == 'G'){ reverse += 'C'; }
2574 else if(oligo[i] == 'C'){ reverse += 'G'; }
2576 else if(oligo[i] == 'R'){ reverse += 'Y'; }
2577 else if(oligo[i] == 'Y'){ reverse += 'R'; }
2579 else if(oligo[i] == 'M'){ reverse += 'K'; }
2580 else if(oligo[i] == 'K'){ reverse += 'M'; }
2582 else if(oligo[i] == 'W'){ reverse += 'W'; }
2583 else if(oligo[i] == 'S'){ reverse += 'S'; }
2585 else if(oligo[i] == 'B'){ reverse += 'V'; }
2586 else if(oligo[i] == 'V'){ reverse += 'B'; }
2588 else if(oligo[i] == 'D'){ reverse += 'H'; }
2589 else if(oligo[i] == 'H'){ reverse += 'D'; }
2591 else if(oligo[i] == '-'){ reverse += '-'; }
2592 else { reverse += 'N'; }
2598 catch(exception& e) {
2599 m->errorOut(e, "TrimOligos", "reverseOligo");
2604 /********************************************************************/