5 * Created by westcott on 9/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
19 m = MothurOut::getInstance();
32 maxFBarcodeLength = 0;
33 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34 if(it->first.length() > maxFBarcodeLength){
35 maxFBarcodeLength = it->first.length();
39 map<string,int>::iterator it;
40 for(it=primers.begin();it!=primers.end();it++){
41 if(it->first.length() > maxFPrimerLength){
42 maxFPrimerLength = it->first.length();
47 for(int i = 0; i < linker.size(); i++){
48 if(linker[i].length() > maxLinkerLength){
49 maxLinkerLength = linker[i].length();
54 for(int i = 0; i < spacer.size(); i++){
55 if(spacer[i].length() > maxSpacerLength){
56 maxSpacerLength = spacer[i].length();
61 m->errorOut(e, "TrimOligos", "TrimOligos");
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
69 m = MothurOut::getInstance();
77 maxFBarcodeLength = 0;
78 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79 string forward = it->second.forward;
80 map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
82 if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
84 if (itForward == ifbarcodes.end()) {
85 vector<int> temp; temp.push_back(it->first);
86 ifbarcodes[forward] = temp;
87 }else { itForward->second.push_back(it->first); }
91 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92 string forward = it->second.forward;
93 map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
95 if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
97 if (itForward == ifprimers.end()) {
98 vector<int> temp; temp.push_back(it->first);
99 ifprimers[forward] = temp;
100 }else { itForward->second.push_back(it->first); }
103 maxRBarcodeLength = 0;
104 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105 string forward = it->second.reverse;
106 map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
108 if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
110 if (itForward == irbarcodes.end()) {
111 vector<int> temp; temp.push_back(it->first);
112 irbarcodes[forward] = temp;
113 }else { itForward->second.push_back(it->first); }
116 maxRPrimerLength = 0;
117 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118 string forward = it->second.reverse;
119 map<string, vector<int> >::iterator itForward = irprimers.find(forward);
121 if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
123 if (itForward == irprimers.end()) {
124 vector<int> temp; temp.push_back(it->first);
125 irprimers[forward] = temp;
126 }else { itForward->second.push_back(it->first); }
132 catch(exception& e) {
133 m->errorOut(e, "TrimOligos", "TrimOligos");
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
141 m = MothurOut::getInstance();
151 maxFBarcodeLength = 0;
152 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153 string oligo = it->first;
154 if(oligo.length() > maxFBarcodeLength){
155 maxFBarcodeLength = oligo.length();
158 maxRBarcodeLength = maxFBarcodeLength;
160 maxFPrimerLength = 0;
161 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162 string oligo = it->first;
163 if(oligo.length() > maxFPrimerLength){
164 maxFPrimerLength = oligo.length();
167 maxRPrimerLength = maxFPrimerLength;
169 catch(exception& e) {
170 m->errorOut(e, "TrimOligos", "TrimOligos");
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
180 string rawSequence = seq.getUnaligned();
182 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183 string oligo = it->first;
185 if(rawSequence.length() < oligo.length()) { break; }
188 int olength = oligo.length();
189 for (int j = 0; j < rawSequence.length()-olength; j++){
190 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191 string rawChunk = rawSequence.substr(j, olength);
192 if(compareDNASeq(oligo, rawChunk)) {
194 primerEnd = primerStart + olength;
201 primerStart = 0; primerEnd = 0;
202 //if you don't want to allow for diffs
203 if (pdiffs == 0) { return false; }
204 else { //try aligning and see if you can find it
206 //can you find the barcode
210 Alignment* alignment;
211 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212 else{ alignment = NULL; }
214 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
216 string prim = it->first;
218 int olength = prim.length();
219 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
221 for (int j = 0; j < rawSequence.length()-olength; j++){
223 string oligo = it->first;
225 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
227 string rawChunk = rawSequence.substr(j, olength+pdiffs);
229 //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230 alignment->alignPrimer(oligo, rawChunk);
231 oligo = alignment->getSeqAAln();
232 string temp = alignment->getSeqBAln();
234 int alnLength = oligo.length();
236 for(int i=oligo.length()-1;i>=0;i--){
237 if(oligo[i] != '-'){ alnLength = i+1; break; }
239 oligo = oligo.substr(0,alnLength);
240 temp = temp.substr(0,alnLength);
242 int numDiff = countDiffs(oligo, temp);
244 if(numDiff < minDiff){
248 primerEnd = primerStart + alnLength;
249 }else if(numDiff == minDiff){ minCount++; }
254 if (alignment != NULL) { delete alignment; }
256 if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches
257 else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers
261 primerStart = 0; primerEnd = 0;
265 catch(exception& e) {
266 m->errorOut(e, "TrimOligos", "stripForward");
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
274 string rawSequence = seq.getUnaligned();
276 for(int i=0;i<revPrimer.size();i++){
277 string oligo = revPrimer[i];
278 if(rawSequence.length() < oligo.length()) { break; }
281 int olength = oligo.length();
282 for (int j = rawSequence.length()-olength; j >= 0; j--){
283 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284 string rawChunk = rawSequence.substr(j, olength);
286 if(compareDNASeq(oligo, rawChunk)) {
288 primerEnd = primerStart + olength;
295 primerStart = 0; primerEnd = 0;
298 catch(exception& e) {
299 m->errorOut(e, "PcrSeqsCommand", "findReverse");
303 //*******************************************************************/
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
308 if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
310 string rawSequence = seq.getUnaligned();
311 int success = bdiffs + 1; //guilty until proven innocent
313 //can you find the barcode
314 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315 string oligo = it->first;
316 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
317 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
321 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
323 seq.setUnaligned(rawSequence.substr(oligo.length()));
325 if(qual.getName() != ""){
326 qual.trimQScores(oligo.length(), -1);
334 //if you found the barcode or if you don't want to allow for diffs
335 if ((bdiffs == 0) || (success == 0)) { return success; }
337 else { //try aligning and see if you can find it
338 Alignment* alignment;
339 if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340 else{ alignment = NULL; }
342 //can you find the barcode
348 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349 string oligo = it->first;
350 // int length = oligo.length();
352 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
353 success = bdiffs + 10;
357 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359 oligo = alignment->getSeqAAln();
360 string temp = alignment->getSeqBAln();
362 int alnLength = oligo.length();
364 for(int i=oligo.length()-1;i>=0;i--){
365 if(oligo[i] != '-'){ alnLength = i+1; break; }
367 oligo = oligo.substr(0,alnLength);
368 temp = temp.substr(0,alnLength);
369 int numDiff = countDiffs(oligo, temp);
371 if(numDiff < minDiff){
374 minGroup = it->second;
376 for(int i=0;i<alnLength;i++){
382 else if(numDiff == minDiff){
388 if(minDiff > bdiffs) { success = minDiff; } //no good matches
389 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
390 else{ //use the best match
392 seq.setUnaligned(rawSequence.substr(minPos));
394 if(qual.getName() != ""){
395 qual.trimQScores(minPos, -1);
400 if (alignment != NULL) { delete alignment; }
407 catch(exception& e) {
408 m->errorOut(e, "TrimOligos", "stripBarcode");
412 //*******************************************************************/
413 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
415 //look for forward barcode
416 string rawFSequence = forwardSeq.getUnaligned();
417 string rawRSequence = reverseSeq.getUnaligned();
418 int success = bdiffs + 1; //guilty until proven innocent
420 //can you find the forward barcode
421 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
422 string foligo = it->second.forward;
423 string roligo = it->second.reverse;
425 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
426 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
429 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
430 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
434 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
436 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
437 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
443 //if you found the barcode or if you don't want to allow for diffs
444 if ((bdiffs == 0) || (success == 0)) { return success; }
445 else { //try aligning and see if you can find it
446 Alignment* alignment;
447 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
448 else{ alignment = NULL; }
450 //can you find the barcode
453 vector< vector<int> > minFGroup;
456 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
465 only want to look for forward = Sarah, John, Anna, Pat, Gail
466 reverse = Westcott, Schloss, Brown, Moore
467 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
469 //cout << endl << forwardSeq.getName() << endl;
470 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
471 string oligo = it->first;
473 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
474 success = bdiffs + 10;
477 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
478 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
479 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
480 oligo = alignment->getSeqAAln();
481 string temp = alignment->getSeqBAln();
483 int alnLength = oligo.length();
485 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
486 oligo = oligo.substr(0,alnLength);
487 temp = temp.substr(0,alnLength);
488 int numDiff = countDiffs(oligo, temp);
490 if (alnLength == 0) { numDiff = bdiffs + 100; }
491 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
493 if(numDiff < minDiff){
497 minFGroup.push_back(it->second);
500 for(int i=0;i<alnLength;i++){
505 minFPos.push_back(tempminFPos);
506 }else if(numDiff == minDiff){
507 minFGroup.push_back(it->second);
509 for(int i=0;i<alnLength;i++){
514 minFPos.push_back(tempminFPos);
518 //cout << minDiff << '\t' << minCount << '\t' << endl;
519 if(minDiff > bdiffs) { success = minDiff; } //no good matches
521 //check for reverse match
522 if (alignment != NULL) { delete alignment; }
524 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
525 else{ alignment = NULL; }
527 //can you find the barcode
530 vector< vector<int> > minRGroup;
533 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
534 string oligo = it->first;
535 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
536 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
537 success = bdiffs + 10;
541 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
542 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
543 oligo = alignment->getSeqAAln();
544 string temp = alignment->getSeqBAln();
546 int alnLength = oligo.length();
547 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
548 oligo = oligo.substr(0,alnLength);
549 temp = temp.substr(0,alnLength);
550 int numDiff = countDiffs(oligo, temp);
551 if (alnLength == 0) { numDiff = bdiffs + 100; }
553 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
554 if(numDiff < minDiff){
558 minRGroup.push_back(it->second);
561 for(int i=0;i<alnLength;i++){
566 minRPos.push_back(tempminRPos);
567 }else if(numDiff == minDiff){
569 for(int i=0;i<alnLength;i++){
574 minRPos.push_back(tempminRPos);
575 minRGroup.push_back(it->second);
580 if(minDiff > bdiffs) { success = minDiff; } //no good matches
582 bool foundMatch = false;
584 for (int i = 0; i < minFGroup.size(); i++) {
585 for (int j = 0; j < minFGroup[i].size(); j++) {
586 for (int k = 0; k < minRGroup.size(); k++) {
587 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
594 if (matches.size() == 1) {
601 //we have an acceptable match for the forward and reverse, but do they match?
603 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
604 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
606 }else { success = bdiffs + 100; }
610 if (alignment != NULL) { delete alignment; }
616 catch(exception& e) {
617 m->errorOut(e, "TrimOligos", "stripIBarcode");
622 //*******************************************************************/
623 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
625 //look for forward barcode
626 string rawFSequence = forwardSeq.getUnaligned();
627 string rawRSequence = reverseSeq.getUnaligned();
628 int success = bdiffs + 1; //guilty until proven innocent
630 //can you find the forward barcode
631 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
632 string foligo = it->second.forward;
633 string roligo = it->second.reverse;
635 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
636 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
639 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
640 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
644 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
646 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
647 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
648 forwardQual.trimQScores(foligo.length(), -1);
649 reverseQual.trimQScores(roligo.length(), -1);
655 //if you found the barcode or if you don't want to allow for diffs
656 if ((bdiffs == 0) || (success == 0)) { return success; }
657 else { //try aligning and see if you can find it
658 Alignment* alignment;
659 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
660 else{ alignment = NULL; }
662 //can you find the barcode
665 vector< vector<int> > minFGroup;
668 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
677 only want to look for forward = Sarah, John, Anna, Pat, Gail
678 reverse = Westcott, Schloss, Brown, Moore
679 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
681 //cout << endl << forwardSeq.getName() << endl;
682 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
683 string oligo = it->first;
685 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
686 success = bdiffs + 10;
689 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
690 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
691 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
692 oligo = alignment->getSeqAAln();
693 string temp = alignment->getSeqBAln();
695 int alnLength = oligo.length();
697 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
698 oligo = oligo.substr(0,alnLength);
699 temp = temp.substr(0,alnLength);
700 int numDiff = countDiffs(oligo, temp);
702 if (alnLength == 0) { numDiff = bdiffs + 100; }
703 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
705 if(numDiff < minDiff){
709 minFGroup.push_back(it->second);
712 for(int i=0;i<alnLength;i++){
717 minFPos.push_back(tempminFPos);
718 }else if(numDiff == minDiff){
719 minFGroup.push_back(it->second);
721 for(int i=0;i<alnLength;i++){
726 minFPos.push_back(tempminFPos);
730 //cout << minDiff << '\t' << minCount << '\t' << endl;
731 if(minDiff > bdiffs) { success = minDiff; } //no good matches
733 //check for reverse match
734 if (alignment != NULL) { delete alignment; }
736 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
737 else{ alignment = NULL; }
739 //can you find the barcode
742 vector< vector<int> > minRGroup;
745 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
746 string oligo = it->first;
747 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
748 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
749 success = bdiffs + 10;
753 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
754 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
755 oligo = alignment->getSeqAAln();
756 string temp = alignment->getSeqBAln();
758 int alnLength = oligo.length();
759 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
760 oligo = oligo.substr(0,alnLength);
761 temp = temp.substr(0,alnLength);
762 int numDiff = countDiffs(oligo, temp);
763 if (alnLength == 0) { numDiff = bdiffs + 100; }
765 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
766 if(numDiff < minDiff){
770 minRGroup.push_back(it->second);
773 for(int i=0;i<alnLength;i++){
778 minRPos.push_back(tempminRPos);
779 }else if(numDiff == minDiff){
781 for(int i=0;i<alnLength;i++){
786 minRPos.push_back(tempminRPos);
787 minRGroup.push_back(it->second);
792 if(minDiff > bdiffs) { success = minDiff; } //no good matches
794 bool foundMatch = false;
796 for (int i = 0; i < minFGroup.size(); i++) {
797 for (int j = 0; j < minFGroup[i].size(); j++) {
798 for (int k = 0; k < minRGroup.size(); k++) {
799 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
806 if (matches.size() == 1) {
813 //we have an acceptable match for the forward and reverse, but do they match?
815 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
816 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
817 forwardQual.trimQScores(fStart, -1);
818 reverseQual.trimQScores(rStart, -1);
820 }else { success = bdiffs + 100; }
824 if (alignment != NULL) { delete alignment; }
830 catch(exception& e) {
831 m->errorOut(e, "TrimOligos", "stripIBarcode");
836 //*******************************************************************/
837 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
839 //look for forward barcode
840 string rawSeq = seq.getUnaligned();
841 int success = bdiffs + 1; //guilty until proven innocent
842 //cout << seq.getName() << endl;
843 //can you find the forward barcode
844 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
845 string foligo = it->second.forward;
846 string roligo = it->second.reverse;
847 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
848 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
849 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
850 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
853 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
854 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
858 if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
860 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
862 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
863 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
864 if(qual.getName() != ""){
865 qual.trimQScores(-1, rawSeq.length()-roligo.length());
866 qual.trimQScores(foligo.length(), -1);
872 //cout << "success=" << success << endl;
873 //if you found the barcode or if you don't want to allow for diffs
874 if ((bdiffs == 0) || (success == 0)) { return success; }
875 else { //try aligning and see if you can find it
876 Alignment* alignment;
877 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
878 else{ alignment = NULL; }
880 //can you find the barcode
883 vector< vector<int> > minFGroup;
886 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
895 only want to look for forward = Sarah, John, Anna, Pat, Gail
896 reverse = Westcott, Schloss, Brown, Moore
897 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
899 //cout << endl << forwardSeq.getName() << endl;
900 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
901 string oligo = it->first;
903 if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
904 success = bdiffs + 10;
907 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
908 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
909 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
910 oligo = alignment->getSeqAAln();
911 string temp = alignment->getSeqBAln();
913 int alnLength = oligo.length();
915 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
916 oligo = oligo.substr(0,alnLength);
917 temp = temp.substr(0,alnLength);
918 int numDiff = countDiffs(oligo, temp);
920 if (alnLength == 0) { numDiff = bdiffs + 100; }
921 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
923 if(numDiff < minDiff){
927 minFGroup.push_back(it->second);
930 for(int i=0;i<alnLength;i++){
935 minFPos.push_back(tempminFPos);
936 }else if(numDiff == minDiff){
937 minFGroup.push_back(it->second);
939 for(int i=0;i<alnLength;i++){
944 minFPos.push_back(tempminFPos);
948 //cout << minDiff << '\t' << minCount << '\t' << endl;
949 if(minDiff > bdiffs) { success = minDiff; } //no good matches
951 //check for reverse match
952 if (alignment != NULL) { delete alignment; }
954 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
955 else{ alignment = NULL; }
957 //can you find the barcode
960 vector< vector<int> > minRGroup;
963 string rawRSequence = reverseOligo(seq.getUnaligned());
964 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
965 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
966 string oligo = reverseOligo(it->first);
967 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
968 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
969 success = bdiffs + 10;
973 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
974 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
975 oligo = alignment->getSeqAAln();
976 string temp = alignment->getSeqBAln();
978 int alnLength = oligo.length();
979 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
980 oligo = oligo.substr(0,alnLength);
981 temp = temp.substr(0,alnLength);
982 int numDiff = countDiffs(oligo, temp);
983 if (alnLength == 0) { numDiff = bdiffs + 100; }
985 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
986 if(numDiff < minDiff){
990 minRGroup.push_back(it->second);
993 for(int i=0;i<alnLength;i++){
998 minRPos.push_back(tempminRPos);
999 }else if(numDiff == minDiff){
1000 int tempminRPos = 0;
1001 for(int i=0;i<alnLength;i++){
1006 minRPos.push_back(tempminRPos);
1007 minRGroup.push_back(it->second);
1012 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1014 bool foundMatch = false;
1015 vector<int> matches;
1016 for (int i = 0; i < minFGroup.size(); i++) {
1017 for (int j = 0; j < minFGroup[i].size(); j++) {
1018 for (int k = 0; k < minRGroup.size(); k++) {
1019 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1026 if (matches.size() == 1) {
1029 fStart = minFPos[0];
1030 rStart = rawSeq.length() - minRPos[0];
1031 if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
1034 //we have an acceptable match for the forward and reverse, but do they match?
1036 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1037 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1038 if(qual.getName() != ""){
1039 qual.trimQScores(-1, rStart);
1040 qual.trimQScores(fStart, -1);
1043 //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1044 }else { success = bdiffs + 100; }
1048 if (alignment != NULL) { delete alignment; }
1054 catch(exception& e) {
1055 m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1060 //*******************************************************************/
1061 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1064 string rawSeq = seq.getUnaligned();
1065 int success = pdiffs + 1; //guilty until proven innocent
1066 //cout << seq.getName() << endl;
1067 //can you find the forward
1068 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1069 string foligo = it->second.forward;
1070 string roligo = it->second.reverse;
1072 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1073 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1074 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1075 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1078 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1079 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1083 if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
1085 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1088 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1089 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1090 if(qual.getName() != ""){
1091 qual.trimQScores(-1, rawSeq.length()-roligo.length());
1092 qual.trimQScores(foligo.length(), -1);
1099 //cout << "success=" << success << endl;
1100 //if you found the barcode or if you don't want to allow for diffs
1101 if ((pdiffs == 0) || (success == 0)) { return success; }
1102 else { //try aligning and see if you can find it
1103 Alignment* alignment;
1104 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1105 else{ alignment = NULL; }
1107 //can you find the barcode
1110 vector< vector<int> > minFGroup;
1111 vector<int> minFPos;
1113 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1122 only want to look for forward = Sarah, John, Anna, Pat, Gail
1123 reverse = Westcott, Schloss, Brown, Moore
1124 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1126 //cout << endl << forwardSeq.getName() << endl;
1127 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1128 string oligo = it->first;
1130 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1131 success = pdiffs + 10;
1134 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
1135 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1136 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1137 oligo = alignment->getSeqAAln();
1138 string temp = alignment->getSeqBAln();
1140 int alnLength = oligo.length();
1142 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1143 oligo = oligo.substr(0,alnLength);
1144 temp = temp.substr(0,alnLength);
1145 int numDiff = countDiffs(oligo, temp);
1147 if (alnLength == 0) { numDiff = pdiffs + 100; }
1148 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1150 if(numDiff < minDiff){
1154 minFGroup.push_back(it->second);
1155 int tempminFPos = 0;
1157 for(int i=0;i<alnLength;i++){
1162 minFPos.push_back(tempminFPos);
1163 }else if(numDiff == minDiff){
1164 minFGroup.push_back(it->second);
1165 int tempminFPos = 0;
1166 for(int i=0;i<alnLength;i++){
1171 minFPos.push_back(tempminFPos);
1175 //cout << minDiff << '\t' << minCount << '\t' << endl;
1176 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1178 //check for reverse match
1179 if (alignment != NULL) { delete alignment; }
1181 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1182 else{ alignment = NULL; }
1184 //can you find the barcode
1187 vector< vector<int> > minRGroup;
1188 vector<int> minRPos;
1190 string rawRSequence = reverseOligo(seq.getUnaligned());
1192 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1193 string oligo = reverseOligo(it->first);
1194 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1195 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1196 success = pdiffs + 10;
1200 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1201 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1202 oligo = alignment->getSeqAAln();
1203 string temp = alignment->getSeqBAln();
1205 int alnLength = oligo.length();
1206 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1207 oligo = oligo.substr(0,alnLength);
1208 temp = temp.substr(0,alnLength);
1209 int numDiff = countDiffs(oligo, temp);
1210 if (alnLength == 0) { numDiff = pdiffs + 100; }
1212 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1213 if(numDiff < minDiff){
1217 minRGroup.push_back(it->second);
1218 int tempminRPos = 0;
1220 for(int i=0;i<alnLength;i++){
1225 minRPos.push_back(tempminRPos);
1226 }else if(numDiff == minDiff){
1227 int tempminRPos = 0;
1228 for(int i=0;i<alnLength;i++){
1233 minRPos.push_back(tempminRPos);
1234 minRGroup.push_back(it->second);
1239 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1241 bool foundMatch = false;
1242 vector<int> matches;
1243 for (int i = 0; i < minFGroup.size(); i++) {
1244 for (int j = 0; j < minFGroup[i].size(); j++) {
1245 for (int k = 0; k < minRGroup.size(); k++) {
1246 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1253 if (matches.size() == 1) {
1256 fStart = minFPos[0];
1257 rStart = rawSeq.length() - minRPos[0];
1258 if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
1261 //we have an acceptable match for the forward and reverse, but do they match?
1264 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1265 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1266 if(qual.getName() != ""){
1267 qual.trimQScores(-1, rStart);
1268 qual.trimQScores(fStart, -1);
1272 //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1273 }else { success = pdiffs + 100; }
1277 if (alignment != NULL) { delete alignment; }
1283 catch(exception& e) {
1284 m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1290 //*******************************************************************/
1291 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1293 //look for forward barcode
1294 string rawFSequence = forwardSeq.getUnaligned();
1295 string rawRSequence = reverseSeq.getUnaligned();
1296 int success = pdiffs + 1; //guilty until proven innocent
1298 //can you find the forward barcode
1299 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1300 string foligo = it->second.forward;
1301 string roligo = it->second.reverse;
1303 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1304 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1307 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1308 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1312 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
1314 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1315 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1316 forwardQual.trimQScores(foligo.length(), -1);
1317 reverseQual.trimQScores(roligo.length(), -1);
1323 //if you found the barcode or if you don't want to allow for diffs
1324 if ((pdiffs == 0) || (success == 0)) { return success; }
1325 else { //try aligning and see if you can find it
1326 Alignment* alignment;
1327 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1328 else{ alignment = NULL; }
1330 //can you find the barcode
1333 vector< vector<int> > minFGroup;
1334 vector<int> minFPos;
1336 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1345 only want to look for forward = Sarah, John, Anna, Pat, Gail
1346 reverse = Westcott, Schloss, Brown, Moore
1347 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1349 //cout << endl << forwardSeq.getName() << endl;
1350 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1351 string oligo = it->first;
1353 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1354 success = pdiffs + 10;
1357 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1358 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1359 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1360 oligo = alignment->getSeqAAln();
1361 string temp = alignment->getSeqBAln();
1363 int alnLength = oligo.length();
1365 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1366 oligo = oligo.substr(0,alnLength);
1367 temp = temp.substr(0,alnLength);
1368 int numDiff = countDiffs(oligo, temp);
1370 if (alnLength == 0) { numDiff = pdiffs + 100; }
1371 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1373 if(numDiff < minDiff){
1377 minFGroup.push_back(it->second);
1378 int tempminFPos = 0;
1380 for(int i=0;i<alnLength;i++){
1385 minFPos.push_back(tempminFPos);
1386 }else if(numDiff == minDiff){
1387 minFGroup.push_back(it->second);
1388 int tempminFPos = 0;
1389 for(int i=0;i<alnLength;i++){
1394 minFPos.push_back(tempminFPos);
1398 //cout << minDiff << '\t' << minCount << '\t' << endl;
1399 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1401 //check for reverse match
1402 if (alignment != NULL) { delete alignment; }
1404 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1405 else{ alignment = NULL; }
1407 //can you find the barcode
1410 vector< vector<int> > minRGroup;
1411 vector<int> minRPos;
1413 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1414 string oligo = it->first;
1415 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1416 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1417 success = pdiffs + 10;
1421 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1422 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1423 oligo = alignment->getSeqAAln();
1424 string temp = alignment->getSeqBAln();
1426 int alnLength = oligo.length();
1427 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1428 oligo = oligo.substr(0,alnLength);
1429 temp = temp.substr(0,alnLength);
1430 int numDiff = countDiffs(oligo, temp);
1431 if (alnLength == 0) { numDiff = pdiffs + 100; }
1433 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1434 if(numDiff < minDiff){
1438 minRGroup.push_back(it->second);
1439 int tempminRPos = 0;
1441 for(int i=0;i<alnLength;i++){
1446 minRPos.push_back(tempminRPos);
1447 }else if(numDiff == minDiff){
1448 int tempminRPos = 0;
1449 for(int i=0;i<alnLength;i++){
1454 minRPos.push_back(tempminRPos);
1455 minRGroup.push_back(it->second);
1460 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1462 bool foundMatch = false;
1463 vector<int> matches;
1464 for (int i = 0; i < minFGroup.size(); i++) {
1465 for (int j = 0; j < minFGroup[i].size(); j++) {
1466 for (int k = 0; k < minRGroup.size(); k++) {
1467 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1474 if (matches.size() == 1) {
1477 fStart = minFPos[0];
1478 rStart = minRPos[0];
1481 //we have an acceptable match for the forward and reverse, but do they match?
1483 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1484 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1485 forwardQual.trimQScores(fStart, -1);
1486 reverseQual.trimQScores(rStart, -1);
1488 }else { success = pdiffs + 100; }
1492 if (alignment != NULL) { delete alignment; }
1498 catch(exception& e) {
1499 m->errorOut(e, "TrimOligos", "stripIForward");
1504 //*******************************************************************/
1505 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1507 //look for forward barcode
1508 string rawFSequence = forwardSeq.getUnaligned();
1509 string rawRSequence = reverseSeq.getUnaligned();
1510 int success = pdiffs + 1; //guilty until proven innocent
1512 //can you find the forward barcode
1513 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1514 string foligo = it->second.forward;
1515 string roligo = it->second.reverse;
1517 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1518 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1521 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1522 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1526 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1528 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1529 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1535 //if you found the barcode or if you don't want to allow for diffs
1536 if ((pdiffs == 0) || (success == 0)) { return success; }
1537 else { //try aligning and see if you can find it
1538 Alignment* alignment;
1539 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1540 else{ alignment = NULL; }
1542 //can you find the barcode
1545 vector< vector<int> > minFGroup;
1546 vector<int> minFPos;
1548 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1557 only want to look for forward = Sarah, John, Anna, Pat, Gail
1558 reverse = Westcott, Schloss, Brown, Moore
1559 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1561 //cout << endl << forwardSeq.getName() << endl;
1562 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1563 string oligo = it->first;
1565 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1566 success = pdiffs + 10;
1569 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1570 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1571 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1572 oligo = alignment->getSeqAAln();
1573 string temp = alignment->getSeqBAln();
1575 int alnLength = oligo.length();
1577 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1578 oligo = oligo.substr(0,alnLength);
1579 temp = temp.substr(0,alnLength);
1580 int numDiff = countDiffs(oligo, temp);
1582 if (alnLength == 0) { numDiff = pdiffs + 100; }
1583 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1585 if(numDiff < minDiff){
1589 minFGroup.push_back(it->second);
1590 int tempminFPos = 0;
1592 for(int i=0;i<alnLength;i++){
1597 minFPos.push_back(tempminFPos);
1598 }else if(numDiff == minDiff){
1599 minFGroup.push_back(it->second);
1600 int tempminFPos = 0;
1601 for(int i=0;i<alnLength;i++){
1606 minFPos.push_back(tempminFPos);
1610 //cout << minDiff << '\t' << minCount << '\t' << endl;
1611 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1613 //check for reverse match
1614 if (alignment != NULL) { delete alignment; }
1616 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1617 else{ alignment = NULL; }
1619 //can you find the barcode
1622 vector< vector<int> > minRGroup;
1623 vector<int> minRPos;
1625 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1626 string oligo = it->first;
1627 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1628 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1629 success = pdiffs + 10;
1633 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1634 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1635 oligo = alignment->getSeqAAln();
1636 string temp = alignment->getSeqBAln();
1638 int alnLength = oligo.length();
1639 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1640 oligo = oligo.substr(0,alnLength);
1641 temp = temp.substr(0,alnLength);
1642 int numDiff = countDiffs(oligo, temp);
1643 if (alnLength == 0) { numDiff = pdiffs + 100; }
1645 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1646 if(numDiff < minDiff){
1650 minRGroup.push_back(it->second);
1651 int tempminRPos = 0;
1653 for(int i=0;i<alnLength;i++){
1658 minRPos.push_back(tempminRPos);
1659 }else if(numDiff == minDiff){
1660 int tempminRPos = 0;
1661 for(int i=0;i<alnLength;i++){
1666 minRPos.push_back(tempminRPos);
1667 minRGroup.push_back(it->second);
1672 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1674 bool foundMatch = false;
1675 vector<int> matches;
1676 for (int i = 0; i < minFGroup.size(); i++) {
1677 for (int j = 0; j < minFGroup[i].size(); j++) {
1678 for (int k = 0; k < minRGroup.size(); k++) {
1679 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1686 if (matches.size() == 1) {
1689 fStart = minFPos[0];
1690 rStart = minRPos[0];
1693 //we have an acceptable match for the forward and reverse, but do they match?
1695 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1696 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1698 }else { success = pdiffs + 100; }
1702 if (alignment != NULL) { delete alignment; }
1708 catch(exception& e) {
1709 m->errorOut(e, "TrimOligos", "stripIForward");
1714 //*******************************************************************/
1715 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1718 string rawSequence = seq.getUnaligned();
1719 int success = bdiffs + 1; //guilty until proven innocent
1721 //can you find the barcode
1722 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1723 string oligo = it->first;
1724 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1725 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1729 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1731 seq.setUnaligned(rawSequence.substr(oligo.length()));
1738 //if you found the barcode or if you don't want to allow for diffs
1739 if ((bdiffs == 0) || (success == 0)) { return success; }
1741 else { //try aligning and see if you can find it
1742 Alignment* alignment;
1743 if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1744 else{ alignment = NULL; }
1746 //can you find the barcode
1752 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1753 string oligo = it->first;
1754 // int length = oligo.length();
1756 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
1757 success = bdiffs + 10;
1761 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1762 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1763 oligo = alignment->getSeqAAln();
1764 string temp = alignment->getSeqBAln();
1766 int alnLength = oligo.length();
1768 for(int i=oligo.length()-1;i>=0;i--){
1769 if(oligo[i] != '-'){ alnLength = i+1; break; }
1771 oligo = oligo.substr(0,alnLength);
1772 temp = temp.substr(0,alnLength);
1774 int numDiff = countDiffs(oligo, temp);
1776 if(numDiff < minDiff){
1779 minGroup = it->second;
1781 for(int i=0;i<alnLength;i++){
1787 else if(numDiff == minDiff){
1793 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1794 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1795 else{ //use the best match
1797 seq.setUnaligned(rawSequence.substr(minPos));
1801 if (alignment != NULL) { delete alignment; }
1808 catch(exception& e) {
1809 m->errorOut(e, "TrimOligos", "stripBarcode");
1815 /********************************************************************/
1816 int TrimOligos::stripForward(Sequence& seq, int& group){
1819 string rawSequence = seq.getUnaligned();
1820 int success = pdiffs + 1; //guilty until proven innocent
1822 //can you find the primer
1823 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1824 string oligo = it->first;
1825 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1826 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1830 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1832 seq.setUnaligned(rawSequence.substr(oligo.length()));
1838 //if you found the barcode or if you don't want to allow for diffs
1839 if ((pdiffs == 0) || (success == 0)) { return success; }
1841 else { //try aligning and see if you can find it
1842 Alignment* alignment;
1843 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1844 else{ alignment = NULL; }
1846 //can you find the barcode
1852 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1853 string oligo = it->first;
1854 // int length = oligo.length();
1856 if(rawSequence.length() < maxFPrimerLength){
1857 success = pdiffs + 100;
1861 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1862 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1863 oligo = alignment->getSeqAAln();
1864 string temp = alignment->getSeqBAln();
1866 int alnLength = oligo.length();
1868 for(int i=oligo.length()-1;i>=0;i--){
1869 if(oligo[i] != '-'){ alnLength = i+1; break; }
1871 oligo = oligo.substr(0,alnLength);
1872 temp = temp.substr(0,alnLength);
1874 int numDiff = countDiffs(oligo, temp);
1876 if(numDiff < minDiff){
1879 minGroup = it->second;
1881 for(int i=0;i<alnLength;i++){
1887 else if(numDiff == minDiff){
1893 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1894 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1895 else{ //use the best match
1897 seq.setUnaligned(rawSequence.substr(minPos));
1901 if (alignment != NULL) { delete alignment; }
1908 catch(exception& e) {
1909 m->errorOut(e, "TrimOligos", "stripForward");
1913 //*******************************************************************/
1914 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1917 if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
1919 string rawSequence = seq.getUnaligned();
1920 int success = pdiffs + 1; //guilty until proven innocent
1922 //can you find the primer
1923 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1924 string oligo = it->first;
1925 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1926 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1930 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1932 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1933 if(qual.getName() != ""){
1934 if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
1941 //if you found the barcode or if you don't want to allow for diffs
1942 if ((pdiffs == 0) || (success == 0)) { return success; }
1944 else { //try aligning and see if you can find it
1945 Alignment* alignment;
1946 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1947 else{ alignment = NULL; }
1949 //can you find the barcode
1955 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1956 string oligo = it->first;
1957 // int length = oligo.length();
1959 if(rawSequence.length() < maxFPrimerLength){
1960 success = pdiffs + 100;
1964 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1965 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1966 oligo = alignment->getSeqAAln();
1967 string temp = alignment->getSeqBAln();
1969 int alnLength = oligo.length();
1971 for(int i=oligo.length()-1;i>=0;i--){
1972 if(oligo[i] != '-'){ alnLength = i+1; break; }
1974 oligo = oligo.substr(0,alnLength);
1975 temp = temp.substr(0,alnLength);
1977 int numDiff = countDiffs(oligo, temp);
1979 if(numDiff < minDiff){
1982 minGroup = it->second;
1984 for(int i=0;i<alnLength;i++){
1990 else if(numDiff == minDiff){
1996 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1997 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1998 else{ //use the best match
2000 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2001 if(qual.getName() != ""){
2002 if (!keepForward) { qual.trimQScores(minPos, -1); }
2007 if (alignment != NULL) { delete alignment; }
2014 catch(exception& e) {
2015 m->errorOut(e, "TrimOligos", "stripForward");
2019 //******************************************************************/
2020 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2022 string rawSequence = seq.getUnaligned();
2023 bool success = 0; //guilty until proven innocent
2025 for(int i=0;i<revPrimer.size();i++){
2026 string oligo = revPrimer[i];
2028 if(rawSequence.length() < oligo.length()){
2033 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2034 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2035 if(qual.getName() != ""){
2036 qual.trimQScores(-1, rawSequence.length()-oligo.length());
2045 catch(exception& e) {
2046 m->errorOut(e, "TrimOligos", "stripReverse");
2050 //******************************************************************/
2051 bool TrimOligos::stripReverse(Sequence& seq){
2054 string rawSequence = seq.getUnaligned();
2055 bool success = 0; //guilty until proven innocent
2057 for(int i=0;i<revPrimer.size();i++){
2058 string oligo = revPrimer[i];
2060 if(rawSequence.length() < oligo.length()){
2065 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2066 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2075 catch(exception& e) {
2076 m->errorOut(e, "TrimOligos", "stripReverse");
2080 //******************************************************************/
2081 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2083 string rawSequence = seq.getUnaligned();
2084 bool success = ldiffs + 1; //guilty until proven innocent
2086 for(int i=0;i<linker.size();i++){
2087 string oligo = linker[i];
2089 if(rawSequence.length() < oligo.length()){
2090 success = ldiffs + 10;
2094 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2095 seq.setUnaligned(rawSequence.substr(oligo.length()));
2096 if(qual.getName() != ""){
2097 qual.trimQScores(oligo.length(), -1);
2104 //if you found the linker or if you don't want to allow for diffs
2105 if ((ldiffs == 0) || (success == 0)) { return success; }
2107 else { //try aligning and see if you can find it
2108 Alignment* alignment;
2109 if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2110 else{ alignment = NULL; }
2112 //can you find the barcode
2117 for(int i = 0; i < linker.size(); i++){
2118 string oligo = linker[i];
2119 // int length = oligo.length();
2121 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2122 success = ldiffs + 10;
2126 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2127 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2128 oligo = alignment->getSeqAAln();
2129 string temp = alignment->getSeqBAln();
2131 int alnLength = oligo.length();
2133 for(int i=oligo.length()-1;i>=0;i--){
2134 if(oligo[i] != '-'){ alnLength = i+1; break; }
2136 oligo = oligo.substr(0,alnLength);
2137 temp = temp.substr(0,alnLength);
2139 int numDiff = countDiffs(oligo, temp);
2141 if(numDiff < minDiff){
2145 for(int i=0;i<alnLength;i++){
2151 else if(numDiff == minDiff){
2157 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2158 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2159 else{ //use the best match
2160 seq.setUnaligned(rawSequence.substr(minPos));
2162 if(qual.getName() != ""){
2163 qual.trimQScores(minPos, -1);
2168 if (alignment != NULL) { delete alignment; }
2176 catch(exception& e) {
2177 m->errorOut(e, "TrimOligos", "stripLinker");
2181 //******************************************************************/
2182 bool TrimOligos::stripLinker(Sequence& seq){
2185 string rawSequence = seq.getUnaligned();
2186 bool success = ldiffs +1; //guilty until proven innocent
2188 for(int i=0;i<linker.size();i++){
2189 string oligo = linker[i];
2191 if(rawSequence.length() < oligo.length()){
2192 success = ldiffs +10;
2196 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2197 seq.setUnaligned(rawSequence.substr(oligo.length()));
2203 //if you found the linker or if you don't want to allow for diffs
2204 if ((ldiffs == 0) || (success == 0)) { return success; }
2206 else { //try aligning and see if you can find it
2207 Alignment* alignment;
2208 if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2209 else{ alignment = NULL; }
2211 //can you find the barcode
2216 for(int i = 0; i < linker.size(); i++){
2217 string oligo = linker[i];
2218 // int length = oligo.length();
2220 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2221 success = ldiffs + 10;
2225 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2226 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2227 oligo = alignment->getSeqAAln();
2228 string temp = alignment->getSeqBAln();
2230 int alnLength = oligo.length();
2232 for(int i=oligo.length()-1;i>=0;i--){
2233 if(oligo[i] != '-'){ alnLength = i+1; break; }
2235 oligo = oligo.substr(0,alnLength);
2236 temp = temp.substr(0,alnLength);
2238 int numDiff = countDiffs(oligo, temp);
2240 if(numDiff < minDiff){
2244 for(int i=0;i<alnLength;i++){
2250 else if(numDiff == minDiff){
2256 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2257 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2258 else{ //use the best match
2259 seq.setUnaligned(rawSequence.substr(minPos));
2263 if (alignment != NULL) { delete alignment; }
2270 catch(exception& e) {
2271 m->errorOut(e, "TrimOligos", "stripLinker");
2276 //******************************************************************/
2277 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2279 string rawSequence = seq.getUnaligned();
2280 bool success = sdiffs+1; //guilty until proven innocent
2282 for(int i=0;i<spacer.size();i++){
2283 string oligo = spacer[i];
2285 if(rawSequence.length() < oligo.length()){
2286 success = sdiffs+10;
2290 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2291 seq.setUnaligned(rawSequence.substr(oligo.length()));
2292 if(qual.getName() != ""){
2293 qual.trimQScores(oligo.length(), -1);
2300 //if you found the spacer or if you don't want to allow for diffs
2301 if ((sdiffs == 0) || (success == 0)) { return success; }
2303 else { //try aligning and see if you can find it
2304 Alignment* alignment;
2305 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2306 else{ alignment = NULL; }
2308 //can you find the barcode
2313 for(int i = 0; i < spacer.size(); i++){
2314 string oligo = spacer[i];
2315 // int length = oligo.length();
2317 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2318 success = sdiffs + 10;
2322 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2323 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2324 oligo = alignment->getSeqAAln();
2325 string temp = alignment->getSeqBAln();
2327 int alnLength = oligo.length();
2329 for(int i=oligo.length()-1;i>=0;i--){
2330 if(oligo[i] != '-'){ alnLength = i+1; break; }
2332 oligo = oligo.substr(0,alnLength);
2333 temp = temp.substr(0,alnLength);
2335 int numDiff = countDiffs(oligo, temp);
2337 if(numDiff < minDiff){
2341 for(int i=0;i<alnLength;i++){
2347 else if(numDiff == minDiff){
2353 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2354 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2355 else{ //use the best match
2356 seq.setUnaligned(rawSequence.substr(minPos));
2358 if(qual.getName() != ""){
2359 qual.trimQScores(minPos, -1);
2364 if (alignment != NULL) { delete alignment; }
2372 catch(exception& e) {
2373 m->errorOut(e, "TrimOligos", "stripSpacer");
2377 //******************************************************************/
2378 bool TrimOligos::stripSpacer(Sequence& seq){
2381 string rawSequence = seq.getUnaligned();
2382 bool success = sdiffs+1; //guilty until proven innocent
2384 for(int i=0;i<spacer.size();i++){
2385 string oligo = spacer[i];
2387 if(rawSequence.length() < oligo.length()){
2388 success = sdiffs+10;
2392 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2393 seq.setUnaligned(rawSequence.substr(oligo.length()));
2399 //if you found the spacer or if you don't want to allow for diffs
2400 if ((sdiffs == 0) || (success == 0)) { return success; }
2402 else { //try aligning and see if you can find it
2403 Alignment* alignment;
2404 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2405 else{ alignment = NULL; }
2407 //can you find the barcode
2412 for(int i = 0; i < spacer.size(); i++){
2413 string oligo = spacer[i];
2414 // int length = oligo.length();
2416 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2417 success = sdiffs + 10;
2421 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2422 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2423 oligo = alignment->getSeqAAln();
2424 string temp = alignment->getSeqBAln();
2426 int alnLength = oligo.length();
2428 for(int i=oligo.length()-1;i>=0;i--){
2429 if(oligo[i] != '-'){ alnLength = i+1; break; }
2431 oligo = oligo.substr(0,alnLength);
2432 temp = temp.substr(0,alnLength);
2434 int numDiff = countDiffs(oligo, temp);
2436 if(numDiff < minDiff){
2440 for(int i=0;i<alnLength;i++){
2446 else if(numDiff == minDiff){
2452 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2453 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2454 else{ //use the best match
2455 seq.setUnaligned(rawSequence.substr(minPos));
2459 if (alignment != NULL) { delete alignment; }
2466 catch(exception& e) {
2467 m->errorOut(e, "TrimOligos", "stripSpacer");
2472 //******************************************************************/
2473 bool TrimOligos::compareDNASeq(string oligo, string seq){
2476 int length = oligo.length();
2478 for(int i=0;i<length;i++){
2480 if(oligo[i] != seq[i]){
2481 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
2482 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
2483 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
2484 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
2485 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
2486 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2487 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
2488 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2489 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2490 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2491 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
2492 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2494 if(success == 0) { break; }
2503 catch(exception& e) {
2504 m->errorOut(e, "TrimOligos", "compareDNASeq");
2509 //********************************************************************/
2510 int TrimOligos::countDiffs(string oligo, string seq){
2513 int length = oligo.length();
2516 for(int i=0;i<length;i++){
2518 if(oligo[i] != seq[i]){
2519 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
2520 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
2521 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
2522 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
2523 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
2524 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2525 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
2526 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2527 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2528 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2529 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
2530 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2537 catch(exception& e) {
2538 m->errorOut(e, "TrimOligos", "countDiffs");
2542 //********************************************************************/
2543 string TrimOligos::reverseOligo(string oligo){
2545 string reverse = "";
2547 for(int i=oligo.length()-1;i>=0;i--){
2549 if(oligo[i] == 'A') { reverse += 'T'; }
2550 else if(oligo[i] == 'T'){ reverse += 'A'; }
2551 else if(oligo[i] == 'U'){ reverse += 'A'; }
2553 else if(oligo[i] == 'G'){ reverse += 'C'; }
2554 else if(oligo[i] == 'C'){ reverse += 'G'; }
2556 else if(oligo[i] == 'R'){ reverse += 'Y'; }
2557 else if(oligo[i] == 'Y'){ reverse += 'R'; }
2559 else if(oligo[i] == 'M'){ reverse += 'K'; }
2560 else if(oligo[i] == 'K'){ reverse += 'M'; }
2562 else if(oligo[i] == 'W'){ reverse += 'W'; }
2563 else if(oligo[i] == 'S'){ reverse += 'S'; }
2565 else if(oligo[i] == 'B'){ reverse += 'V'; }
2566 else if(oligo[i] == 'V'){ reverse += 'B'; }
2568 else if(oligo[i] == 'D'){ reverse += 'H'; }
2569 else if(oligo[i] == 'H'){ reverse += 'D'; }
2571 else if(oligo[i] == '-'){ reverse += '-'; }
2572 else { reverse += 'N'; }
2578 catch(exception& e) {
2579 m->errorOut(e, "TrimOligos", "reverseOligo");
2584 /********************************************************************/