5 * Created by westcott on 9/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
19 m = MothurOut::getInstance();
32 maxFBarcodeLength = 0;
33 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34 if(it->first.length() > maxFBarcodeLength){
35 maxFBarcodeLength = it->first.length();
39 map<string,int>::iterator it;
40 for(it=primers.begin();it!=primers.end();it++){
41 if(it->first.length() > maxFPrimerLength){
42 maxFPrimerLength = it->first.length();
47 for(int i = 0; i < linker.size(); i++){
48 if(linker[i].length() > maxLinkerLength){
49 maxLinkerLength = linker[i].length();
54 for(int i = 0; i < spacer.size(); i++){
55 if(spacer[i].length() > maxSpacerLength){
56 maxSpacerLength = spacer[i].length();
61 m->errorOut(e, "TrimOligos", "TrimOligos");
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
69 m = MothurOut::getInstance();
77 maxFBarcodeLength = 0;
78 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79 string forward = it->second.forward;
80 map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
82 if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
84 if (itForward == ifbarcodes.end()) {
85 vector<int> temp; temp.push_back(it->first);
86 ifbarcodes[forward] = temp;
87 }else { itForward->second.push_back(it->first); }
91 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92 string forward = it->second.forward;
93 map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
95 if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
97 if (itForward == ifprimers.end()) {
98 vector<int> temp; temp.push_back(it->first);
99 ifprimers[forward] = temp;
100 }else { itForward->second.push_back(it->first); }
103 maxRBarcodeLength = 0;
104 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105 string forward = it->second.reverse;
106 map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
108 if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
110 if (itForward == irbarcodes.end()) {
111 vector<int> temp; temp.push_back(it->first);
112 irbarcodes[forward] = temp;
113 }else { itForward->second.push_back(it->first); }
116 maxRPrimerLength = 0;
117 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118 string forward = it->second.reverse;
119 map<string, vector<int> >::iterator itForward = irprimers.find(forward);
121 if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
123 if (itForward == irprimers.end()) {
124 vector<int> temp; temp.push_back(it->first);
125 irprimers[forward] = temp;
126 }else { itForward->second.push_back(it->first); }
132 catch(exception& e) {
133 m->errorOut(e, "TrimOligos", "TrimOligos");
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
141 m = MothurOut::getInstance();
151 maxFBarcodeLength = 0;
152 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153 string oligo = it->first;
154 if(oligo.length() > maxFBarcodeLength){
155 maxFBarcodeLength = oligo.length();
158 maxRBarcodeLength = maxFBarcodeLength;
160 maxFPrimerLength = 0;
161 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162 string oligo = it->first;
163 if(oligo.length() > maxFPrimerLength){
164 maxFPrimerLength = oligo.length();
167 maxRPrimerLength = maxFPrimerLength;
169 catch(exception& e) {
170 m->errorOut(e, "TrimOligos", "TrimOligos");
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
180 string rawSequence = seq.getUnaligned();
182 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183 string oligo = it->first;
185 if(rawSequence.length() < oligo.length()) { break; }
188 int olength = oligo.length();
189 for (int j = 0; j < rawSequence.length()-olength; j++){
190 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191 string rawChunk = rawSequence.substr(j, olength);
192 if(compareDNASeq(oligo, rawChunk)) {
194 primerEnd = primerStart + olength;
201 primerStart = 0; primerEnd = 0;
202 //if you don't want to allow for diffs
203 if (pdiffs == 0) { return false; }
204 else { //try aligning and see if you can find it
206 //can you find the barcode
210 Alignment* alignment;
211 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212 else{ alignment = NULL; }
214 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
216 string prim = it->first;
218 int olength = prim.length();
219 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
221 for (int j = 0; j < rawSequence.length()-olength; j++){
223 string oligo = it->first;
225 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
227 string rawChunk = rawSequence.substr(j, olength+pdiffs);
229 //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230 alignment->alignPrimer(oligo, rawChunk);
231 oligo = alignment->getSeqAAln();
232 string temp = alignment->getSeqBAln();
234 int alnLength = oligo.length();
236 for(int i=oligo.length()-1;i>=0;i--){
237 if(oligo[i] != '-'){ alnLength = i+1; break; }
239 oligo = oligo.substr(0,alnLength);
240 temp = temp.substr(0,alnLength);
242 int numDiff = countDiffs(oligo, temp);
244 if(numDiff < minDiff){
248 primerEnd = primerStart + alnLength;
249 }else if(numDiff == minDiff){ minCount++; }
254 if (alignment != NULL) { delete alignment; }
256 if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches
257 else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers
261 primerStart = 0; primerEnd = 0;
265 catch(exception& e) {
266 m->errorOut(e, "TrimOligos", "stripForward");
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
274 string rawSequence = seq.getUnaligned();
276 for(int i=0;i<revPrimer.size();i++){
277 string oligo = revPrimer[i];
278 if(rawSequence.length() < oligo.length()) { break; }
281 int olength = oligo.length();
282 for (int j = rawSequence.length()-olength; j >= 0; j--){
283 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284 string rawChunk = rawSequence.substr(j, olength);
286 if(compareDNASeq(oligo, rawChunk)) {
288 primerEnd = primerStart + olength;
295 primerStart = 0; primerEnd = 0;
298 catch(exception& e) {
299 m->errorOut(e, "PcrSeqsCommand", "findReverse");
303 //*******************************************************************/
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
308 if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
310 string rawSequence = seq.getUnaligned();
311 int success = bdiffs + 1; //guilty until proven innocent
313 //can you find the barcode
314 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315 string oligo = it->first;
316 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
317 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
321 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
323 seq.setUnaligned(rawSequence.substr(oligo.length()));
325 if(qual.getName() != ""){
326 qual.trimQScores(oligo.length(), -1);
334 //if you found the barcode or if you don't want to allow for diffs
335 if ((bdiffs == 0) || (success == 0)) { return success; }
337 else { //try aligning and see if you can find it
338 Alignment* alignment;
339 if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340 else{ alignment = NULL; }
342 //can you find the barcode
348 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349 string oligo = it->first;
350 // int length = oligo.length();
352 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
353 success = bdiffs + 10;
357 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359 oligo = alignment->getSeqAAln();
360 string temp = alignment->getSeqBAln();
362 int alnLength = oligo.length();
364 for(int i=oligo.length()-1;i>=0;i--){
365 if(oligo[i] != '-'){ alnLength = i+1; break; }
367 oligo = oligo.substr(0,alnLength);
368 temp = temp.substr(0,alnLength);
369 int numDiff = countDiffs(oligo, temp);
371 if(numDiff < minDiff){
374 minGroup = it->second;
376 for(int i=0;i<alnLength;i++){
382 else if(numDiff == minDiff){
388 if(minDiff > bdiffs) { success = minDiff; } //no good matches
389 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
390 else{ //use the best match
392 seq.setUnaligned(rawSequence.substr(minPos));
394 if(qual.getName() != ""){
395 qual.trimQScores(minPos, -1);
400 if (alignment != NULL) { delete alignment; }
407 catch(exception& e) {
408 m->errorOut(e, "TrimOligos", "stripBarcode");
412 //*******************************************************************/
413 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
415 //look for forward barcode
416 string rawFSequence = forwardSeq.getUnaligned();
417 string rawRSequence = reverseSeq.getUnaligned();
418 int success = bdiffs + 1; //guilty until proven innocent
420 //can you find the forward barcode
421 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
422 string foligo = it->second.forward;
423 string roligo = it->second.reverse;
425 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
426 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
429 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
430 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
434 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
436 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
437 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
443 //if you found the barcode or if you don't want to allow for diffs
444 if ((bdiffs == 0) || (success == 0)) { return success; }
445 else { //try aligning and see if you can find it
446 Alignment* alignment;
447 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
448 else{ alignment = NULL; }
450 //can you find the barcode
453 vector< vector<int> > minFGroup;
456 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
465 only want to look for forward = Sarah, John, Anna, Pat, Gail
466 reverse = Westcott, Schloss, Brown, Moore
467 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
469 //cout << endl << forwardSeq.getName() << endl;
470 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
471 string oligo = it->first;
473 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
474 success = bdiffs + 10;
477 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
478 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
479 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
480 oligo = alignment->getSeqAAln();
481 string temp = alignment->getSeqBAln();
483 int alnLength = oligo.length();
485 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
486 oligo = oligo.substr(0,alnLength);
487 temp = temp.substr(0,alnLength);
488 int numDiff = countDiffs(oligo, temp);
490 if (alnLength == 0) { numDiff = bdiffs + 100; }
491 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
493 if(numDiff < minDiff){
497 minFGroup.push_back(it->second);
500 for(int i=0;i<alnLength;i++){
505 minFPos.push_back(tempminFPos);
506 }else if(numDiff == minDiff){
507 minFGroup.push_back(it->second);
509 for(int i=0;i<alnLength;i++){
514 minFPos.push_back(tempminFPos);
518 //cout << minDiff << '\t' << minCount << '\t' << endl;
519 if(minDiff > bdiffs) { success = minDiff; } //no good matches
521 //check for reverse match
522 if (alignment != NULL) { delete alignment; }
524 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
525 else{ alignment = NULL; }
527 //can you find the barcode
530 vector< vector<int> > minRGroup;
533 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
534 string oligo = it->first;
535 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
536 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
537 success = bdiffs + 10;
541 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
542 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
543 oligo = alignment->getSeqAAln();
544 string temp = alignment->getSeqBAln();
546 int alnLength = oligo.length();
547 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
548 oligo = oligo.substr(0,alnLength);
549 temp = temp.substr(0,alnLength);
550 int numDiff = countDiffs(oligo, temp);
551 if (alnLength == 0) { numDiff = bdiffs + 100; }
553 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
554 if(numDiff < minDiff){
558 minRGroup.push_back(it->second);
561 for(int i=0;i<alnLength;i++){
566 minRPos.push_back(tempminRPos);
567 }else if(numDiff == minDiff){
569 for(int i=0;i<alnLength;i++){
574 minRPos.push_back(tempminRPos);
575 minRGroup.push_back(it->second);
580 if(minDiff > bdiffs) { success = minDiff; } //no good matches
582 bool foundMatch = false;
584 for (int i = 0; i < minFGroup.size(); i++) {
585 for (int j = 0; j < minFGroup[i].size(); j++) {
586 for (int k = 0; k < minRGroup.size(); k++) {
587 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
594 if (matches.size() == 1) {
601 //we have an acceptable match for the forward and reverse, but do they match?
603 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
604 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
606 }else { success = bdiffs + 100; }
610 if (alignment != NULL) { delete alignment; }
616 catch(exception& e) {
617 m->errorOut(e, "TrimOligos", "stripIBarcode");
622 //*******************************************************************/
623 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
625 //look for forward barcode
626 string rawFSequence = forwardSeq.getUnaligned();
627 string rawRSequence = reverseSeq.getUnaligned();
628 int success = bdiffs + 1; //guilty until proven innocent
630 //can you find the forward barcode
631 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
632 string foligo = it->second.forward;
633 string roligo = it->second.reverse;
635 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
636 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
639 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
640 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
644 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
646 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
647 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
648 forwardQual.trimQScores(foligo.length(), -1);
649 reverseQual.trimQScores(roligo.length(), -1);
655 //if you found the barcode or if you don't want to allow for diffs
656 if ((bdiffs == 0) || (success == 0)) { return success; }
657 else { //try aligning and see if you can find it
658 Alignment* alignment;
659 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
660 else{ alignment = NULL; }
662 //can you find the barcode
665 vector< vector<int> > minFGroup;
668 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
677 only want to look for forward = Sarah, John, Anna, Pat, Gail
678 reverse = Westcott, Schloss, Brown, Moore
679 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
681 //cout << endl << forwardSeq.getName() << endl;
682 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
683 string oligo = it->first;
685 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
686 success = bdiffs + 10;
689 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
690 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
691 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
692 oligo = alignment->getSeqAAln();
693 string temp = alignment->getSeqBAln();
695 int alnLength = oligo.length();
697 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
698 oligo = oligo.substr(0,alnLength);
699 temp = temp.substr(0,alnLength);
700 int numDiff = countDiffs(oligo, temp);
702 if (alnLength == 0) { numDiff = bdiffs + 100; }
703 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
705 if(numDiff < minDiff){
709 minFGroup.push_back(it->second);
712 for(int i=0;i<alnLength;i++){
717 minFPos.push_back(tempminFPos);
718 }else if(numDiff == minDiff){
719 minFGroup.push_back(it->second);
721 for(int i=0;i<alnLength;i++){
726 minFPos.push_back(tempminFPos);
730 //cout << minDiff << '\t' << minCount << '\t' << endl;
731 if(minDiff > bdiffs) { success = minDiff; } //no good matches
733 //check for reverse match
734 if (alignment != NULL) { delete alignment; }
736 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
737 else{ alignment = NULL; }
739 //can you find the barcode
742 vector< vector<int> > minRGroup;
745 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
746 string oligo = it->first;
747 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
748 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
749 success = bdiffs + 10;
753 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
754 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
755 oligo = alignment->getSeqAAln();
756 string temp = alignment->getSeqBAln();
758 int alnLength = oligo.length();
759 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
760 oligo = oligo.substr(0,alnLength);
761 temp = temp.substr(0,alnLength);
762 int numDiff = countDiffs(oligo, temp);
763 if (alnLength == 0) { numDiff = bdiffs + 100; }
765 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
766 if(numDiff < minDiff){
770 minRGroup.push_back(it->second);
773 for(int i=0;i<alnLength;i++){
778 minRPos.push_back(tempminRPos);
779 }else if(numDiff == minDiff){
781 for(int i=0;i<alnLength;i++){
786 minRPos.push_back(tempminRPos);
787 minRGroup.push_back(it->second);
792 if(minDiff > bdiffs) { success = minDiff; } //no good matches
794 bool foundMatch = false;
796 for (int i = 0; i < minFGroup.size(); i++) {
797 for (int j = 0; j < minFGroup[i].size(); j++) {
798 for (int k = 0; k < minRGroup.size(); k++) {
799 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
806 if (matches.size() == 1) {
813 //we have an acceptable match for the forward and reverse, but do they match?
815 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
816 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
817 forwardQual.trimQScores(fStart, -1);
818 reverseQual.trimQScores(rStart, -1);
820 }else { success = bdiffs + 100; }
824 if (alignment != NULL) { delete alignment; }
830 catch(exception& e) {
831 m->errorOut(e, "TrimOligos", "stripIBarcode");
836 //*******************************************************************/
837 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
839 //look for forward barcode
840 string rawSeq = seq.getUnaligned();
841 int success = bdiffs + 1; //guilty until proven innocent
842 //cout << seq.getName() << endl;
843 //can you find the forward barcode
844 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
845 string foligo = it->second.forward;
846 string roligo = it->second.reverse;
847 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
848 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
849 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
850 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
853 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
854 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
858 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
860 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
861 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
862 if(qual.getName() != ""){
863 qual.trimQScores(-1, rawSeq.length()-roligo.length());
864 qual.trimQScores(foligo.length(), -1);
870 //cout << "success=" << success << endl;
871 //if you found the barcode or if you don't want to allow for diffs
872 if ((bdiffs == 0) || (success == 0)) { return success; }
873 else { //try aligning and see if you can find it
874 Alignment* alignment;
875 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
876 else{ alignment = NULL; }
878 //can you find the barcode
881 vector< vector<int> > minFGroup;
884 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
893 only want to look for forward = Sarah, John, Anna, Pat, Gail
894 reverse = Westcott, Schloss, Brown, Moore
895 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
897 //cout << endl << forwardSeq.getName() << endl;
898 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
899 string oligo = it->first;
901 if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
902 success = bdiffs + 10;
905 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
906 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
907 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
908 oligo = alignment->getSeqAAln();
909 string temp = alignment->getSeqBAln();
911 int alnLength = oligo.length();
913 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
914 oligo = oligo.substr(0,alnLength);
915 temp = temp.substr(0,alnLength);
916 int numDiff = countDiffs(oligo, temp);
918 if (alnLength == 0) { numDiff = bdiffs + 100; }
919 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
921 if(numDiff < minDiff){
925 minFGroup.push_back(it->second);
928 for(int i=0;i<alnLength;i++){
933 minFPos.push_back(tempminFPos);
934 }else if(numDiff == minDiff){
935 minFGroup.push_back(it->second);
937 for(int i=0;i<alnLength;i++){
942 minFPos.push_back(tempminFPos);
946 //cout << minDiff << '\t' << minCount << '\t' << endl;
947 if(minDiff > bdiffs) { success = minDiff; } //no good matches
949 //check for reverse match
950 if (alignment != NULL) { delete alignment; }
952 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
953 else{ alignment = NULL; }
955 //can you find the barcode
958 vector< vector<int> > minRGroup;
961 string rawRSequence = reverseOligo(seq.getUnaligned());
962 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
963 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
964 string oligo = reverseOligo(it->first);
965 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
966 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
967 success = bdiffs + 10;
971 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
972 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
973 oligo = alignment->getSeqAAln();
974 string temp = alignment->getSeqBAln();
976 int alnLength = oligo.length();
977 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
978 oligo = oligo.substr(0,alnLength);
979 temp = temp.substr(0,alnLength);
980 int numDiff = countDiffs(oligo, temp);
981 if (alnLength == 0) { numDiff = bdiffs + 100; }
983 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
984 if(numDiff < minDiff){
988 minRGroup.push_back(it->second);
991 for(int i=0;i<alnLength;i++){
996 minRPos.push_back(tempminRPos);
997 }else if(numDiff == minDiff){
999 for(int i=0;i<alnLength;i++){
1004 minRPos.push_back(tempminRPos);
1005 minRGroup.push_back(it->second);
1010 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1012 bool foundMatch = false;
1013 vector<int> matches;
1014 for (int i = 0; i < minFGroup.size(); i++) {
1015 for (int j = 0; j < minFGroup[i].size(); j++) {
1016 for (int k = 0; k < minRGroup.size(); k++) {
1017 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1024 if (matches.size() == 1) {
1027 fStart = minFPos[0];
1028 rStart = rawSeq.length() - minRPos[0];
1029 if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
1032 //we have an acceptable match for the forward and reverse, but do they match?
1034 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1035 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1036 if(qual.getName() != ""){
1037 qual.trimQScores(-1, rStart);
1038 qual.trimQScores(fStart, -1);
1041 //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1042 }else { success = bdiffs + 100; }
1046 if (alignment != NULL) { delete alignment; }
1052 catch(exception& e) {
1053 m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1058 //*******************************************************************/
1059 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1062 string rawSeq = seq.getUnaligned();
1063 int success = pdiffs + 1; //guilty until proven innocent
1064 //cout << seq.getName() << endl;
1065 //can you find the forward
1066 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1067 string foligo = it->second.forward;
1068 string roligo = it->second.reverse;
1070 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1071 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1072 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1073 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1076 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1077 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1081 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1084 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1085 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1086 if(qual.getName() != ""){
1087 qual.trimQScores(-1, rawSeq.length()-roligo.length());
1088 qual.trimQScores(foligo.length(), -1);
1095 //cout << "success=" << success << endl;
1096 //if you found the barcode or if you don't want to allow for diffs
1097 if ((pdiffs == 0) || (success == 0)) { return success; }
1098 else { //try aligning and see if you can find it
1099 Alignment* alignment;
1100 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1101 else{ alignment = NULL; }
1103 //can you find the barcode
1106 vector< vector<int> > minFGroup;
1107 vector<int> minFPos;
1109 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1118 only want to look for forward = Sarah, John, Anna, Pat, Gail
1119 reverse = Westcott, Schloss, Brown, Moore
1120 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1122 //cout << endl << forwardSeq.getName() << endl;
1123 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1124 string oligo = it->first;
1126 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1127 success = pdiffs + 10;
1130 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
1131 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1132 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1133 oligo = alignment->getSeqAAln();
1134 string temp = alignment->getSeqBAln();
1136 int alnLength = oligo.length();
1138 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1139 oligo = oligo.substr(0,alnLength);
1140 temp = temp.substr(0,alnLength);
1141 int numDiff = countDiffs(oligo, temp);
1143 if (alnLength == 0) { numDiff = pdiffs + 100; }
1144 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1146 if(numDiff < minDiff){
1150 minFGroup.push_back(it->second);
1151 int tempminFPos = 0;
1153 for(int i=0;i<alnLength;i++){
1158 minFPos.push_back(tempminFPos);
1159 }else if(numDiff == minDiff){
1160 minFGroup.push_back(it->second);
1161 int tempminFPos = 0;
1162 for(int i=0;i<alnLength;i++){
1167 minFPos.push_back(tempminFPos);
1171 //cout << minDiff << '\t' << minCount << '\t' << endl;
1172 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1174 //check for reverse match
1175 if (alignment != NULL) { delete alignment; }
1177 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1178 else{ alignment = NULL; }
1180 //can you find the barcode
1183 vector< vector<int> > minRGroup;
1184 vector<int> minRPos;
1186 string rawRSequence = reverseOligo(seq.getUnaligned());
1188 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1189 string oligo = reverseOligo(it->first);
1190 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1191 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1192 success = pdiffs + 10;
1196 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1197 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1198 oligo = alignment->getSeqAAln();
1199 string temp = alignment->getSeqBAln();
1201 int alnLength = oligo.length();
1202 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1203 oligo = oligo.substr(0,alnLength);
1204 temp = temp.substr(0,alnLength);
1205 int numDiff = countDiffs(oligo, temp);
1206 if (alnLength == 0) { numDiff = pdiffs + 100; }
1208 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1209 if(numDiff < minDiff){
1213 minRGroup.push_back(it->second);
1214 int tempminRPos = 0;
1216 for(int i=0;i<alnLength;i++){
1221 minRPos.push_back(tempminRPos);
1222 }else if(numDiff == minDiff){
1223 int tempminRPos = 0;
1224 for(int i=0;i<alnLength;i++){
1229 minRPos.push_back(tempminRPos);
1230 minRGroup.push_back(it->second);
1235 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1237 bool foundMatch = false;
1238 vector<int> matches;
1239 for (int i = 0; i < minFGroup.size(); i++) {
1240 for (int j = 0; j < minFGroup[i].size(); j++) {
1241 for (int k = 0; k < minRGroup.size(); k++) {
1242 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1249 if (matches.size() == 1) {
1252 fStart = minFPos[0];
1253 rStart = rawSeq.length() - minRPos[0];
1254 if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
1257 //we have an acceptable match for the forward and reverse, but do they match?
1260 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1261 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1262 if(qual.getName() != ""){
1263 qual.trimQScores(-1, rStart);
1264 qual.trimQScores(fStart, -1);
1268 //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1269 }else { success = pdiffs + 100; }
1273 if (alignment != NULL) { delete alignment; }
1279 catch(exception& e) {
1280 m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1286 //*******************************************************************/
1287 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1289 //look for forward barcode
1290 string rawFSequence = forwardSeq.getUnaligned();
1291 string rawRSequence = reverseSeq.getUnaligned();
1292 int success = pdiffs + 1; //guilty until proven innocent
1294 //can you find the forward barcode
1295 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1296 string foligo = it->second.forward;
1297 string roligo = it->second.reverse;
1299 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1300 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1303 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1304 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1308 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
1310 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1311 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1312 forwardQual.trimQScores(foligo.length(), -1);
1313 reverseQual.trimQScores(roligo.length(), -1);
1319 //if you found the barcode or if you don't want to allow for diffs
1320 if ((pdiffs == 0) || (success == 0)) { return success; }
1321 else { //try aligning and see if you can find it
1322 Alignment* alignment;
1323 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1324 else{ alignment = NULL; }
1326 //can you find the barcode
1329 vector< vector<int> > minFGroup;
1330 vector<int> minFPos;
1332 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1341 only want to look for forward = Sarah, John, Anna, Pat, Gail
1342 reverse = Westcott, Schloss, Brown, Moore
1343 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1345 //cout << endl << forwardSeq.getName() << endl;
1346 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1347 string oligo = it->first;
1349 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1350 success = pdiffs + 10;
1353 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1354 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1355 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1356 oligo = alignment->getSeqAAln();
1357 string temp = alignment->getSeqBAln();
1359 int alnLength = oligo.length();
1361 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1362 oligo = oligo.substr(0,alnLength);
1363 temp = temp.substr(0,alnLength);
1364 int numDiff = countDiffs(oligo, temp);
1366 if (alnLength == 0) { numDiff = pdiffs + 100; }
1367 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1369 if(numDiff < minDiff){
1373 minFGroup.push_back(it->second);
1374 int tempminFPos = 0;
1376 for(int i=0;i<alnLength;i++){
1381 minFPos.push_back(tempminFPos);
1382 }else if(numDiff == minDiff){
1383 minFGroup.push_back(it->second);
1384 int tempminFPos = 0;
1385 for(int i=0;i<alnLength;i++){
1390 minFPos.push_back(tempminFPos);
1394 //cout << minDiff << '\t' << minCount << '\t' << endl;
1395 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1397 //check for reverse match
1398 if (alignment != NULL) { delete alignment; }
1400 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1401 else{ alignment = NULL; }
1403 //can you find the barcode
1406 vector< vector<int> > minRGroup;
1407 vector<int> minRPos;
1409 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1410 string oligo = it->first;
1411 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1412 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1413 success = pdiffs + 10;
1417 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1418 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1419 oligo = alignment->getSeqAAln();
1420 string temp = alignment->getSeqBAln();
1422 int alnLength = oligo.length();
1423 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1424 oligo = oligo.substr(0,alnLength);
1425 temp = temp.substr(0,alnLength);
1426 int numDiff = countDiffs(oligo, temp);
1427 if (alnLength == 0) { numDiff = pdiffs + 100; }
1429 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1430 if(numDiff < minDiff){
1434 minRGroup.push_back(it->second);
1435 int tempminRPos = 0;
1437 for(int i=0;i<alnLength;i++){
1442 minRPos.push_back(tempminRPos);
1443 }else if(numDiff == minDiff){
1444 int tempminRPos = 0;
1445 for(int i=0;i<alnLength;i++){
1450 minRPos.push_back(tempminRPos);
1451 minRGroup.push_back(it->second);
1456 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1458 bool foundMatch = false;
1459 vector<int> matches;
1460 for (int i = 0; i < minFGroup.size(); i++) {
1461 for (int j = 0; j < minFGroup[i].size(); j++) {
1462 for (int k = 0; k < minRGroup.size(); k++) {
1463 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1470 if (matches.size() == 1) {
1473 fStart = minFPos[0];
1474 rStart = minRPos[0];
1477 //we have an acceptable match for the forward and reverse, but do they match?
1479 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1480 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1481 forwardQual.trimQScores(fStart, -1);
1482 reverseQual.trimQScores(rStart, -1);
1484 }else { success = pdiffs + 100; }
1488 if (alignment != NULL) { delete alignment; }
1494 catch(exception& e) {
1495 m->errorOut(e, "TrimOligos", "stripIForward");
1500 //*******************************************************************/
1501 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1503 //look for forward barcode
1504 string rawFSequence = forwardSeq.getUnaligned();
1505 string rawRSequence = reverseSeq.getUnaligned();
1506 int success = pdiffs + 1; //guilty until proven innocent
1508 //can you find the forward barcode
1509 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1510 string foligo = it->second.forward;
1511 string roligo = it->second.reverse;
1513 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1514 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1517 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1518 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1522 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1524 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1525 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1531 //if you found the barcode or if you don't want to allow for diffs
1532 if ((pdiffs == 0) || (success == 0)) { return success; }
1533 else { //try aligning and see if you can find it
1534 Alignment* alignment;
1535 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1536 else{ alignment = NULL; }
1538 //can you find the barcode
1541 vector< vector<int> > minFGroup;
1542 vector<int> minFPos;
1544 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1553 only want to look for forward = Sarah, John, Anna, Pat, Gail
1554 reverse = Westcott, Schloss, Brown, Moore
1555 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1557 //cout << endl << forwardSeq.getName() << endl;
1558 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1559 string oligo = it->first;
1561 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1562 success = pdiffs + 10;
1565 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1566 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1567 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1568 oligo = alignment->getSeqAAln();
1569 string temp = alignment->getSeqBAln();
1571 int alnLength = oligo.length();
1573 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1574 oligo = oligo.substr(0,alnLength);
1575 temp = temp.substr(0,alnLength);
1576 int numDiff = countDiffs(oligo, temp);
1578 if (alnLength == 0) { numDiff = pdiffs + 100; }
1579 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1581 if(numDiff < minDiff){
1585 minFGroup.push_back(it->second);
1586 int tempminFPos = 0;
1588 for(int i=0;i<alnLength;i++){
1593 minFPos.push_back(tempminFPos);
1594 }else if(numDiff == minDiff){
1595 minFGroup.push_back(it->second);
1596 int tempminFPos = 0;
1597 for(int i=0;i<alnLength;i++){
1602 minFPos.push_back(tempminFPos);
1606 //cout << minDiff << '\t' << minCount << '\t' << endl;
1607 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1609 //check for reverse match
1610 if (alignment != NULL) { delete alignment; }
1612 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1613 else{ alignment = NULL; }
1615 //can you find the barcode
1618 vector< vector<int> > minRGroup;
1619 vector<int> minRPos;
1621 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1622 string oligo = it->first;
1623 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1624 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1625 success = pdiffs + 10;
1629 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1630 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1631 oligo = alignment->getSeqAAln();
1632 string temp = alignment->getSeqBAln();
1634 int alnLength = oligo.length();
1635 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1636 oligo = oligo.substr(0,alnLength);
1637 temp = temp.substr(0,alnLength);
1638 int numDiff = countDiffs(oligo, temp);
1639 if (alnLength == 0) { numDiff = pdiffs + 100; }
1641 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1642 if(numDiff < minDiff){
1646 minRGroup.push_back(it->second);
1647 int tempminRPos = 0;
1649 for(int i=0;i<alnLength;i++){
1654 minRPos.push_back(tempminRPos);
1655 }else if(numDiff == minDiff){
1656 int tempminRPos = 0;
1657 for(int i=0;i<alnLength;i++){
1662 minRPos.push_back(tempminRPos);
1663 minRGroup.push_back(it->second);
1668 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1670 bool foundMatch = false;
1671 vector<int> matches;
1672 for (int i = 0; i < minFGroup.size(); i++) {
1673 for (int j = 0; j < minFGroup[i].size(); j++) {
1674 for (int k = 0; k < minRGroup.size(); k++) {
1675 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1682 if (matches.size() == 1) {
1685 fStart = minFPos[0];
1686 rStart = minRPos[0];
1689 //we have an acceptable match for the forward and reverse, but do they match?
1691 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1692 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1694 }else { success = pdiffs + 100; }
1698 if (alignment != NULL) { delete alignment; }
1704 catch(exception& e) {
1705 m->errorOut(e, "TrimOligos", "stripIForward");
1710 //*******************************************************************/
1711 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1714 string rawSequence = seq.getUnaligned();
1715 int success = bdiffs + 1; //guilty until proven innocent
1717 //can you find the barcode
1718 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1719 string oligo = it->first;
1720 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1721 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1725 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1727 seq.setUnaligned(rawSequence.substr(oligo.length()));
1734 //if you found the barcode or if you don't want to allow for diffs
1735 if ((bdiffs == 0) || (success == 0)) { return success; }
1737 else { //try aligning and see if you can find it
1738 Alignment* alignment;
1739 if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1740 else{ alignment = NULL; }
1742 //can you find the barcode
1748 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1749 string oligo = it->first;
1750 // int length = oligo.length();
1752 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
1753 success = bdiffs + 10;
1757 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1758 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1759 oligo = alignment->getSeqAAln();
1760 string temp = alignment->getSeqBAln();
1762 int alnLength = oligo.length();
1764 for(int i=oligo.length()-1;i>=0;i--){
1765 if(oligo[i] != '-'){ alnLength = i+1; break; }
1767 oligo = oligo.substr(0,alnLength);
1768 temp = temp.substr(0,alnLength);
1770 int numDiff = countDiffs(oligo, temp);
1772 if(numDiff < minDiff){
1775 minGroup = it->second;
1777 for(int i=0;i<alnLength;i++){
1783 else if(numDiff == minDiff){
1789 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1790 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1791 else{ //use the best match
1793 seq.setUnaligned(rawSequence.substr(minPos));
1797 if (alignment != NULL) { delete alignment; }
1804 catch(exception& e) {
1805 m->errorOut(e, "TrimOligos", "stripBarcode");
1811 /********************************************************************/
1812 int TrimOligos::stripForward(Sequence& seq, int& group){
1815 string rawSequence = seq.getUnaligned();
1816 int success = pdiffs + 1; //guilty until proven innocent
1818 //can you find the primer
1819 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1820 string oligo = it->first;
1821 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1822 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1826 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1828 seq.setUnaligned(rawSequence.substr(oligo.length()));
1834 //if you found the barcode or if you don't want to allow for diffs
1835 if ((pdiffs == 0) || (success == 0)) { return success; }
1837 else { //try aligning and see if you can find it
1838 Alignment* alignment;
1839 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1840 else{ alignment = NULL; }
1842 //can you find the barcode
1848 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1849 string oligo = it->first;
1850 // int length = oligo.length();
1852 if(rawSequence.length() < maxFPrimerLength){
1853 success = pdiffs + 100;
1857 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1858 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1859 oligo = alignment->getSeqAAln();
1860 string temp = alignment->getSeqBAln();
1862 int alnLength = oligo.length();
1864 for(int i=oligo.length()-1;i>=0;i--){
1865 if(oligo[i] != '-'){ alnLength = i+1; break; }
1867 oligo = oligo.substr(0,alnLength);
1868 temp = temp.substr(0,alnLength);
1870 int numDiff = countDiffs(oligo, temp);
1872 if(numDiff < minDiff){
1875 minGroup = it->second;
1877 for(int i=0;i<alnLength;i++){
1883 else if(numDiff == minDiff){
1889 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1890 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1891 else{ //use the best match
1893 seq.setUnaligned(rawSequence.substr(minPos));
1897 if (alignment != NULL) { delete alignment; }
1904 catch(exception& e) {
1905 m->errorOut(e, "TrimOligos", "stripForward");
1909 //*******************************************************************/
1910 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1913 if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
1915 string rawSequence = seq.getUnaligned();
1916 int success = pdiffs + 1; //guilty until proven innocent
1918 //can you find the primer
1919 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1920 string oligo = it->first;
1921 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1922 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1926 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1928 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1929 if(qual.getName() != ""){
1930 if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
1937 //if you found the barcode or if you don't want to allow for diffs
1938 if ((pdiffs == 0) || (success == 0)) { return success; }
1940 else { //try aligning and see if you can find it
1941 Alignment* alignment;
1942 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1943 else{ alignment = NULL; }
1945 //can you find the barcode
1951 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1952 string oligo = it->first;
1953 // int length = oligo.length();
1955 if(rawSequence.length() < maxFPrimerLength){
1956 success = pdiffs + 100;
1960 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1961 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1962 oligo = alignment->getSeqAAln();
1963 string temp = alignment->getSeqBAln();
1965 int alnLength = oligo.length();
1967 for(int i=oligo.length()-1;i>=0;i--){
1968 if(oligo[i] != '-'){ alnLength = i+1; break; }
1970 oligo = oligo.substr(0,alnLength);
1971 temp = temp.substr(0,alnLength);
1973 int numDiff = countDiffs(oligo, temp);
1975 if(numDiff < minDiff){
1978 minGroup = it->second;
1980 for(int i=0;i<alnLength;i++){
1986 else if(numDiff == minDiff){
1992 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1993 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1994 else{ //use the best match
1996 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
1997 if(qual.getName() != ""){
1998 if (!keepForward) { qual.trimQScores(minPos, -1); }
2003 if (alignment != NULL) { delete alignment; }
2010 catch(exception& e) {
2011 m->errorOut(e, "TrimOligos", "stripForward");
2015 //******************************************************************/
2016 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2018 string rawSequence = seq.getUnaligned();
2019 bool success = 0; //guilty until proven innocent
2021 for(int i=0;i<revPrimer.size();i++){
2022 string oligo = revPrimer[i];
2024 if(rawSequence.length() < oligo.length()){
2029 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2030 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2031 if(qual.getName() != ""){
2032 qual.trimQScores(-1, rawSequence.length()-oligo.length());
2041 catch(exception& e) {
2042 m->errorOut(e, "TrimOligos", "stripReverse");
2046 //******************************************************************/
2047 bool TrimOligos::stripReverse(Sequence& seq){
2050 string rawSequence = seq.getUnaligned();
2051 bool success = 0; //guilty until proven innocent
2053 for(int i=0;i<revPrimer.size();i++){
2054 string oligo = revPrimer[i];
2056 if(rawSequence.length() < oligo.length()){
2061 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2062 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2071 catch(exception& e) {
2072 m->errorOut(e, "TrimOligos", "stripReverse");
2076 //******************************************************************/
2077 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2079 string rawSequence = seq.getUnaligned();
2080 bool success = ldiffs + 1; //guilty until proven innocent
2082 for(int i=0;i<linker.size();i++){
2083 string oligo = linker[i];
2085 if(rawSequence.length() < oligo.length()){
2086 success = ldiffs + 10;
2090 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2091 seq.setUnaligned(rawSequence.substr(oligo.length()));
2092 if(qual.getName() != ""){
2093 qual.trimQScores(oligo.length(), -1);
2100 //if you found the linker or if you don't want to allow for diffs
2101 if ((ldiffs == 0) || (success == 0)) { return success; }
2103 else { //try aligning and see if you can find it
2104 Alignment* alignment;
2105 if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2106 else{ alignment = NULL; }
2108 //can you find the barcode
2113 for(int i = 0; i < linker.size(); i++){
2114 string oligo = linker[i];
2115 // int length = oligo.length();
2117 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2118 success = ldiffs + 10;
2122 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2123 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2124 oligo = alignment->getSeqAAln();
2125 string temp = alignment->getSeqBAln();
2127 int alnLength = oligo.length();
2129 for(int i=oligo.length()-1;i>=0;i--){
2130 if(oligo[i] != '-'){ alnLength = i+1; break; }
2132 oligo = oligo.substr(0,alnLength);
2133 temp = temp.substr(0,alnLength);
2135 int numDiff = countDiffs(oligo, temp);
2137 if(numDiff < minDiff){
2141 for(int i=0;i<alnLength;i++){
2147 else if(numDiff == minDiff){
2153 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2154 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2155 else{ //use the best match
2156 seq.setUnaligned(rawSequence.substr(minPos));
2158 if(qual.getName() != ""){
2159 qual.trimQScores(minPos, -1);
2164 if (alignment != NULL) { delete alignment; }
2172 catch(exception& e) {
2173 m->errorOut(e, "TrimOligos", "stripLinker");
2177 //******************************************************************/
2178 bool TrimOligos::stripLinker(Sequence& seq){
2181 string rawSequence = seq.getUnaligned();
2182 bool success = ldiffs +1; //guilty until proven innocent
2184 for(int i=0;i<linker.size();i++){
2185 string oligo = linker[i];
2187 if(rawSequence.length() < oligo.length()){
2188 success = ldiffs +10;
2192 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2193 seq.setUnaligned(rawSequence.substr(oligo.length()));
2199 //if you found the linker or if you don't want to allow for diffs
2200 if ((ldiffs == 0) || (success == 0)) { return success; }
2202 else { //try aligning and see if you can find it
2203 Alignment* alignment;
2204 if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2205 else{ alignment = NULL; }
2207 //can you find the barcode
2212 for(int i = 0; i < linker.size(); i++){
2213 string oligo = linker[i];
2214 // int length = oligo.length();
2216 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2217 success = ldiffs + 10;
2221 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2222 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2223 oligo = alignment->getSeqAAln();
2224 string temp = alignment->getSeqBAln();
2226 int alnLength = oligo.length();
2228 for(int i=oligo.length()-1;i>=0;i--){
2229 if(oligo[i] != '-'){ alnLength = i+1; break; }
2231 oligo = oligo.substr(0,alnLength);
2232 temp = temp.substr(0,alnLength);
2234 int numDiff = countDiffs(oligo, temp);
2236 if(numDiff < minDiff){
2240 for(int i=0;i<alnLength;i++){
2246 else if(numDiff == minDiff){
2252 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2253 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2254 else{ //use the best match
2255 seq.setUnaligned(rawSequence.substr(minPos));
2259 if (alignment != NULL) { delete alignment; }
2266 catch(exception& e) {
2267 m->errorOut(e, "TrimOligos", "stripLinker");
2272 //******************************************************************/
2273 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2275 string rawSequence = seq.getUnaligned();
2276 bool success = sdiffs+1; //guilty until proven innocent
2278 for(int i=0;i<spacer.size();i++){
2279 string oligo = spacer[i];
2281 if(rawSequence.length() < oligo.length()){
2282 success = sdiffs+10;
2286 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2287 seq.setUnaligned(rawSequence.substr(oligo.length()));
2288 if(qual.getName() != ""){
2289 qual.trimQScores(oligo.length(), -1);
2296 //if you found the spacer or if you don't want to allow for diffs
2297 if ((sdiffs == 0) || (success == 0)) { return success; }
2299 else { //try aligning and see if you can find it
2300 Alignment* alignment;
2301 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2302 else{ alignment = NULL; }
2304 //can you find the barcode
2309 for(int i = 0; i < spacer.size(); i++){
2310 string oligo = spacer[i];
2311 // int length = oligo.length();
2313 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2314 success = sdiffs + 10;
2318 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2319 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2320 oligo = alignment->getSeqAAln();
2321 string temp = alignment->getSeqBAln();
2323 int alnLength = oligo.length();
2325 for(int i=oligo.length()-1;i>=0;i--){
2326 if(oligo[i] != '-'){ alnLength = i+1; break; }
2328 oligo = oligo.substr(0,alnLength);
2329 temp = temp.substr(0,alnLength);
2331 int numDiff = countDiffs(oligo, temp);
2333 if(numDiff < minDiff){
2337 for(int i=0;i<alnLength;i++){
2343 else if(numDiff == minDiff){
2349 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2350 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2351 else{ //use the best match
2352 seq.setUnaligned(rawSequence.substr(minPos));
2354 if(qual.getName() != ""){
2355 qual.trimQScores(minPos, -1);
2360 if (alignment != NULL) { delete alignment; }
2368 catch(exception& e) {
2369 m->errorOut(e, "TrimOligos", "stripSpacer");
2373 //******************************************************************/
2374 bool TrimOligos::stripSpacer(Sequence& seq){
2377 string rawSequence = seq.getUnaligned();
2378 bool success = sdiffs+1; //guilty until proven innocent
2380 for(int i=0;i<spacer.size();i++){
2381 string oligo = spacer[i];
2383 if(rawSequence.length() < oligo.length()){
2384 success = sdiffs+10;
2388 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2389 seq.setUnaligned(rawSequence.substr(oligo.length()));
2395 //if you found the spacer or if you don't want to allow for diffs
2396 if ((sdiffs == 0) || (success == 0)) { return success; }
2398 else { //try aligning and see if you can find it
2399 Alignment* alignment;
2400 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2401 else{ alignment = NULL; }
2403 //can you find the barcode
2408 for(int i = 0; i < spacer.size(); i++){
2409 string oligo = spacer[i];
2410 // int length = oligo.length();
2412 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2413 success = sdiffs + 10;
2417 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2418 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2419 oligo = alignment->getSeqAAln();
2420 string temp = alignment->getSeqBAln();
2422 int alnLength = oligo.length();
2424 for(int i=oligo.length()-1;i>=0;i--){
2425 if(oligo[i] != '-'){ alnLength = i+1; break; }
2427 oligo = oligo.substr(0,alnLength);
2428 temp = temp.substr(0,alnLength);
2430 int numDiff = countDiffs(oligo, temp);
2432 if(numDiff < minDiff){
2436 for(int i=0;i<alnLength;i++){
2442 else if(numDiff == minDiff){
2448 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2449 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2450 else{ //use the best match
2451 seq.setUnaligned(rawSequence.substr(minPos));
2455 if (alignment != NULL) { delete alignment; }
2462 catch(exception& e) {
2463 m->errorOut(e, "TrimOligos", "stripSpacer");
2468 //******************************************************************/
2469 bool TrimOligos::compareDNASeq(string oligo, string seq){
2472 int length = oligo.length();
2474 for(int i=0;i<length;i++){
2476 if(oligo[i] != seq[i]){
2477 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
2478 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
2479 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
2480 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
2481 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
2482 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2483 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
2484 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2485 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2486 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2487 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
2488 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2490 if(success == 0) { break; }
2499 catch(exception& e) {
2500 m->errorOut(e, "TrimOligos", "compareDNASeq");
2505 //********************************************************************/
2506 int TrimOligos::countDiffs(string oligo, string seq){
2509 int length = oligo.length();
2512 for(int i=0;i<length;i++){
2514 if(oligo[i] != seq[i]){
2515 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
2516 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
2517 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
2518 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
2519 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
2520 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2521 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
2522 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2523 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2524 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2525 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
2526 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2533 catch(exception& e) {
2534 m->errorOut(e, "TrimOligos", "countDiffs");
2538 //********************************************************************/
2539 string TrimOligos::reverseOligo(string oligo){
2541 string reverse = "";
2543 for(int i=oligo.length()-1;i>=0;i--){
2545 if(oligo[i] == 'A') { reverse += 'T'; }
2546 else if(oligo[i] == 'T'){ reverse += 'A'; }
2547 else if(oligo[i] == 'U'){ reverse += 'A'; }
2549 else if(oligo[i] == 'G'){ reverse += 'C'; }
2550 else if(oligo[i] == 'C'){ reverse += 'G'; }
2552 else if(oligo[i] == 'R'){ reverse += 'Y'; }
2553 else if(oligo[i] == 'Y'){ reverse += 'R'; }
2555 else if(oligo[i] == 'M'){ reverse += 'K'; }
2556 else if(oligo[i] == 'K'){ reverse += 'M'; }
2558 else if(oligo[i] == 'W'){ reverse += 'W'; }
2559 else if(oligo[i] == 'S'){ reverse += 'S'; }
2561 else if(oligo[i] == 'B'){ reverse += 'V'; }
2562 else if(oligo[i] == 'V'){ reverse += 'B'; }
2564 else if(oligo[i] == 'D'){ reverse += 'H'; }
2565 else if(oligo[i] == 'H'){ reverse += 'D'; }
2567 else if(oligo[i] == '-'){ reverse += '-'; }
2568 else { reverse += 'N'; }
2574 catch(exception& e) {
2575 m->errorOut(e, "TrimOligos", "reverseOligo");
2580 /********************************************************************/