5 * Created by westcott on 9/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
19 m = MothurOut::getInstance();
32 maxFBarcodeLength = 0;
33 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34 if(it->first.length() > maxFBarcodeLength){
35 maxFBarcodeLength = it->first.length();
39 map<string,int>::iterator it;
40 for(it=primers.begin();it!=primers.end();it++){
41 if(it->first.length() > maxFPrimerLength){
42 maxFPrimerLength = it->first.length();
47 for(int i = 0; i < linker.size(); i++){
48 if(linker[i].length() > maxLinkerLength){
49 maxLinkerLength = linker[i].length();
54 for(int i = 0; i < spacer.size(); i++){
55 if(spacer[i].length() > maxSpacerLength){
56 maxSpacerLength = spacer[i].length();
61 m->errorOut(e, "TrimOligos", "TrimOligos");
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
69 m = MothurOut::getInstance();
77 maxFBarcodeLength = 0;
78 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79 string forward = it->second.forward;
80 map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
82 if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
84 if (itForward == ifbarcodes.end()) {
85 vector<int> temp; temp.push_back(it->first);
86 ifbarcodes[forward] = temp;
87 }else { itForward->second.push_back(it->first); }
91 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92 string forward = it->second.forward;
93 map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
95 if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
97 if (itForward == ifprimers.end()) {
98 vector<int> temp; temp.push_back(it->first);
99 ifprimers[forward] = temp;
100 }else { itForward->second.push_back(it->first); }
103 maxRBarcodeLength = 0;
104 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105 string forward = it->second.reverse;
106 map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
108 if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
110 if (itForward == irbarcodes.end()) {
111 vector<int> temp; temp.push_back(it->first);
112 irbarcodes[forward] = temp;
113 }else { itForward->second.push_back(it->first); }
116 maxRPrimerLength = 0;
117 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118 string forward = it->second.reverse;
119 map<string, vector<int> >::iterator itForward = irprimers.find(forward);
121 if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
123 if (itForward == irprimers.end()) {
124 vector<int> temp; temp.push_back(it->first);
125 irprimers[forward] = temp;
126 }else { itForward->second.push_back(it->first); }
132 catch(exception& e) {
133 m->errorOut(e, "TrimOligos", "TrimOligos");
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
141 m = MothurOut::getInstance();
151 maxFBarcodeLength = 0;
152 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153 string oligo = it->first;
154 if(oligo.length() > maxFBarcodeLength){
155 maxFBarcodeLength = oligo.length();
158 maxRBarcodeLength = maxFBarcodeLength;
160 maxFPrimerLength = 0;
161 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162 string oligo = it->first;
163 if(oligo.length() > maxFPrimerLength){
164 maxFPrimerLength = oligo.length();
167 maxRPrimerLength = maxFPrimerLength;
169 catch(exception& e) {
170 m->errorOut(e, "TrimOligos", "TrimOligos");
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
180 string rawSequence = seq.getUnaligned();
182 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183 string oligo = it->first;
185 if(rawSequence.length() < oligo.length()) { break; }
188 int olength = oligo.length();
189 for (int j = 0; j < rawSequence.length()-olength; j++){
190 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191 string rawChunk = rawSequence.substr(j, olength);
192 if(compareDNASeq(oligo, rawChunk)) {
194 primerEnd = primerStart + olength;
201 primerStart = 0; primerEnd = 0;
202 //if you don't want to allow for diffs
203 if (pdiffs == 0) { return false; }
204 else { //try aligning and see if you can find it
206 //can you find the barcode
210 Alignment* alignment;
211 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212 else{ alignment = NULL; }
214 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
216 string prim = it->first;
218 int olength = prim.length();
219 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
221 for (int j = 0; j < rawSequence.length()-olength; j++){
223 string oligo = it->first;
225 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
227 string rawChunk = rawSequence.substr(j, olength+pdiffs);
229 //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230 alignment->alignPrimer(oligo, rawChunk);
231 oligo = alignment->getSeqAAln();
232 string temp = alignment->getSeqBAln();
234 int alnLength = oligo.length();
236 for(int i=oligo.length()-1;i>=0;i--){
237 if(oligo[i] != '-'){ alnLength = i+1; break; }
239 oligo = oligo.substr(0,alnLength);
240 temp = temp.substr(0,alnLength);
242 int numDiff = countDiffs(oligo, temp);
244 if(numDiff < minDiff){
248 primerEnd = primerStart + alnLength;
249 }else if(numDiff == minDiff){ minCount++; }
254 if (alignment != NULL) { delete alignment; }
256 if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches
257 else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers
261 primerStart = 0; primerEnd = 0;
265 catch(exception& e) {
266 m->errorOut(e, "TrimOligos", "stripForward");
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
274 string rawSequence = seq.getUnaligned();
276 for(int i=0;i<revPrimer.size();i++){
277 string oligo = revPrimer[i];
278 if(rawSequence.length() < oligo.length()) { break; }
281 int olength = oligo.length();
282 for (int j = rawSequence.length()-olength; j >= 0; j--){
283 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284 string rawChunk = rawSequence.substr(j, olength);
286 if(compareDNASeq(oligo, rawChunk)) {
288 primerEnd = primerStart + olength;
295 primerStart = 0; primerEnd = 0;
298 catch(exception& e) {
299 m->errorOut(e, "PcrSeqsCommand", "findReverse");
303 //*******************************************************************/
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
308 if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
310 string rawSequence = seq.getUnaligned();
311 int success = bdiffs + 1; //guilty until proven innocent
313 //can you find the barcode
314 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315 string oligo = it->first;
316 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
317 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
321 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
323 seq.setUnaligned(rawSequence.substr(oligo.length()));
325 if(qual.getName() != ""){
326 qual.trimQScores(oligo.length(), -1);
334 //if you found the barcode or if you don't want to allow for diffs
335 if ((bdiffs == 0) || (success == 0)) { return success; }
337 else { //try aligning and see if you can find it
338 Alignment* alignment;
339 if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340 else{ alignment = NULL; }
342 //can you find the barcode
348 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349 string oligo = it->first;
350 // int length = oligo.length();
352 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
353 success = bdiffs + 10;
357 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359 oligo = alignment->getSeqAAln();
360 string temp = alignment->getSeqBAln();
362 int alnLength = oligo.length();
364 for(int i=oligo.length()-1;i>=0;i--){
365 if(oligo[i] != '-'){ alnLength = i+1; break; }
367 oligo = oligo.substr(0,alnLength);
368 temp = temp.substr(0,alnLength);
369 int numDiff = countDiffs(oligo, temp);
371 if(numDiff < minDiff){
374 minGroup = it->second;
376 for(int i=0;i<alnLength;i++){
382 else if(numDiff == minDiff){
388 if(minDiff > bdiffs) { success = minDiff; } //no good matches
389 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
390 else{ //use the best match
392 seq.setUnaligned(rawSequence.substr(minPos));
394 if(qual.getName() != ""){
395 qual.trimQScores(minPos, -1);
400 if (alignment != NULL) { delete alignment; }
407 catch(exception& e) {
408 m->errorOut(e, "TrimOligos", "stripBarcode");
412 //*******************************************************************/
413 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
415 //look for forward barcode
416 string rawFSequence = forwardSeq.getUnaligned();
417 string rawRSequence = reverseSeq.getUnaligned();
418 int success = bdiffs + 1; //guilty until proven innocent
420 //can you find the forward barcode
421 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
422 string foligo = it->second.forward;
423 string roligo = it->second.reverse;
425 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
426 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
429 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
430 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
434 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
436 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
437 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
443 //if you found the barcode or if you don't want to allow for diffs
444 if ((bdiffs == 0) || (success == 0)) { return success; }
445 else { //try aligning and see if you can find it
446 Alignment* alignment;
447 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
448 else{ alignment = NULL; }
450 //can you find the barcode
453 vector< vector<int> > minFGroup;
456 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
466 only want to look for forward = Sarah, John, Anna, Pat, Gail
467 reverse = Westcott, Schloss, Brown, Moore
469 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
471 //cout << endl << forwardSeq.getName() << endl;
472 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
473 string oligo = it->first;
475 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
476 success = bdiffs + 10;
479 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
480 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
481 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
482 oligo = alignment->getSeqAAln();
483 string temp = alignment->getSeqBAln();
485 int alnLength = oligo.length();
487 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
488 oligo = oligo.substr(0,alnLength);
489 temp = temp.substr(0,alnLength);
490 int numDiff = countDiffs(oligo, temp);
492 if (alnLength == 0) { numDiff = bdiffs + 100; }
493 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
495 if(numDiff < minDiff){
499 minFGroup.push_back(it->second);
502 for(int i=0;i<alnLength;i++){
507 minFPos.push_back(tempminFPos);
508 }else if(numDiff == minDiff){
509 minFGroup.push_back(it->second);
511 for(int i=0;i<alnLength;i++){
516 minFPos.push_back(tempminFPos);
520 //cout << minDiff << '\t' << minCount << '\t' << endl;
521 if(minDiff > bdiffs) { success = minDiff; } //no good matches
523 //check for reverse match
524 if (alignment != NULL) { delete alignment; }
526 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
527 else{ alignment = NULL; }
529 //can you find the barcode
532 vector< vector<int> > minRGroup;
535 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
536 string oligo = it->first;
537 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
538 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
539 success = bdiffs + 10;
543 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
544 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
545 oligo = alignment->getSeqAAln();
546 string temp = alignment->getSeqBAln();
548 int alnLength = oligo.length();
549 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
550 oligo = oligo.substr(0,alnLength);
551 temp = temp.substr(0,alnLength);
552 int numDiff = countDiffs(oligo, temp);
553 if (alnLength == 0) { numDiff = bdiffs + 100; }
555 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
556 if(numDiff < minDiff){
560 minRGroup.push_back(it->second);
563 for(int i=0;i<alnLength;i++){
568 minRPos.push_back(tempminRPos);
569 }else if(numDiff == minDiff){
571 for(int i=0;i<alnLength;i++){
576 minRPos.push_back(tempminRPos);
577 minRGroup.push_back(it->second);
582 /*cout << minDiff << '\t' << minCount << '\t' << endl;
583 for (int i = 0; i < minFGroup.size(); i++) {
585 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
589 for (int i = 0; i < minRGroup.size(); i++) {
591 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
595 if(minDiff > bdiffs) { success = minDiff; } //no good matches
597 bool foundMatch = false;
599 for (int i = 0; i < minFGroup.size(); i++) {
600 for (int j = 0; j < minFGroup[i].size(); j++) {
601 for (int k = 0; k < minRGroup.size(); k++) {
602 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
609 if (matches.size() == 1) {
616 //we have an acceptable match for the forward and reverse, but do they match?
618 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
619 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
621 }else { success = bdiffs + 100; }
625 if (alignment != NULL) { delete alignment; }
631 catch(exception& e) {
632 m->errorOut(e, "TrimOligos", "stripIBarcode");
637 //*******************************************************************/
638 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
640 //look for forward barcode
641 string rawFSequence = forwardSeq.getUnaligned();
642 string rawRSequence = reverseSeq.getUnaligned();
643 int success = bdiffs + 1; //guilty until proven innocent
645 //can you find the forward barcode
646 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
647 string foligo = it->second.forward;
648 string roligo = it->second.reverse;
650 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
651 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
654 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
655 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
659 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
661 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
662 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
663 forwardQual.trimQScores(foligo.length(), -1);
664 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
670 //if you found the barcode or if you don't want to allow for diffs
671 if ((bdiffs == 0) || (success == 0)) { return success; }
672 else { //try aligning and see if you can find it
673 Alignment* alignment;
674 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
675 else{ alignment = NULL; }
677 //can you find the barcode
680 vector< vector<int> > minFGroup;
683 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
693 only want to look for forward = Sarah, John, Anna, Pat, Gail
694 reverse = Westcott, Schloss, Brown, Moore
696 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
698 //cout << endl << forwardSeq.getName() << endl;
699 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
700 string oligo = it->first;
702 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
703 success = bdiffs + 10;
706 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
707 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
708 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
709 oligo = alignment->getSeqAAln();
710 string temp = alignment->getSeqBAln();
712 int alnLength = oligo.length();
714 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
715 oligo = oligo.substr(0,alnLength);
716 temp = temp.substr(0,alnLength);
717 int numDiff = countDiffs(oligo, temp);
719 if (alnLength == 0) { numDiff = bdiffs + 100; }
720 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
722 if(numDiff < minDiff){
726 minFGroup.push_back(it->second);
729 for(int i=0;i<alnLength;i++){
734 minFPos.push_back(tempminFPos);
735 }else if(numDiff == minDiff){
736 minFGroup.push_back(it->second);
738 for(int i=0;i<alnLength;i++){
743 minFPos.push_back(tempminFPos);
747 //cout << minDiff << '\t' << minCount << '\t' << endl;
748 if(minDiff > bdiffs) { success = minDiff; } //no good matches
750 //check for reverse match
751 if (alignment != NULL) { delete alignment; }
753 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
754 else{ alignment = NULL; }
756 //can you find the barcode
759 vector< vector<int> > minRGroup;
762 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
763 string oligo = it->first;
764 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
765 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
766 success = bdiffs + 10;
770 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
771 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
772 oligo = alignment->getSeqAAln();
773 string temp = alignment->getSeqBAln();
775 int alnLength = oligo.length();
776 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
777 oligo = oligo.substr(0,alnLength);
778 temp = temp.substr(0,alnLength);
779 int numDiff = countDiffs(oligo, temp);
780 if (alnLength == 0) { numDiff = bdiffs + 100; }
782 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
783 if(numDiff < minDiff){
787 minRGroup.push_back(it->second);
790 for(int i=0;i<alnLength;i++){
795 minRPos.push_back(tempminRPos);
796 }else if(numDiff == minDiff){
798 for(int i=0;i<alnLength;i++){
803 minRPos.push_back(tempminRPos);
804 minRGroup.push_back(it->second);
809 /*cout << minDiff << '\t' << minCount << '\t' << endl;
810 for (int i = 0; i < minFGroup.size(); i++) {
812 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
816 for (int i = 0; i < minRGroup.size(); i++) {
818 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
822 if(minDiff > bdiffs) { success = minDiff; } //no good matches
824 bool foundMatch = false;
826 for (int i = 0; i < minFGroup.size(); i++) {
827 for (int j = 0; j < minFGroup[i].size(); j++) {
828 for (int k = 0; k < minRGroup.size(); k++) {
829 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
836 if (matches.size() == 1) {
843 //we have an acceptable match for the forward and reverse, but do they match?
845 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
846 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
847 forwardQual.trimQScores(fStart, -1);
848 reverseQual.trimQScores(rStart, -1);
850 }else { success = bdiffs + 100; }
854 if (alignment != NULL) { delete alignment; }
860 catch(exception& e) {
861 m->errorOut(e, "TrimOligos", "stripIBarcode");
866 //*******************************************************************/
867 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
869 //look for forward barcode
870 string rawSeq = seq.getUnaligned();
871 int success = bdiffs + 1; //guilty until proven innocent
872 //cout << seq.getName() << endl;
873 //can you find the forward barcode
874 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
875 string foligo = it->second.forward;
876 string roligo = it->second.reverse;
877 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
878 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
879 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
880 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
883 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
884 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
888 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
890 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
891 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
892 if(qual.getName() != ""){
893 qual.trimQScores(-1, rawSeq.length()-roligo.length());
894 qual.trimQScores(foligo.length(), -1);
900 //cout << "success=" << success << endl;
901 //if you found the barcode or if you don't want to allow for diffs
902 if ((bdiffs == 0) || (success == 0)) { return success; }
903 else { //try aligning and see if you can find it
904 Alignment* alignment;
905 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
906 else{ alignment = NULL; }
908 //can you find the barcode
911 vector< vector<int> > minFGroup;
914 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
924 only want to look for forward = Sarah, John, Anna, Pat, Gail
925 reverse = Westcott, Schloss, Brown, Moore
927 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
929 //cout << endl << forwardSeq.getName() << endl;
930 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
931 string oligo = it->first;
933 if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
934 success = bdiffs + 10;
937 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
938 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
939 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
940 oligo = alignment->getSeqAAln();
941 string temp = alignment->getSeqBAln();
943 int alnLength = oligo.length();
945 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
946 oligo = oligo.substr(0,alnLength);
947 temp = temp.substr(0,alnLength);
948 int numDiff = countDiffs(oligo, temp);
950 if (alnLength == 0) { numDiff = bdiffs + 100; }
951 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
953 if(numDiff < minDiff){
957 minFGroup.push_back(it->second);
960 for(int i=0;i<alnLength;i++){
965 minFPos.push_back(tempminFPos);
966 }else if(numDiff == minDiff){
967 minFGroup.push_back(it->second);
969 for(int i=0;i<alnLength;i++){
974 minFPos.push_back(tempminFPos);
978 //cout << minDiff << '\t' << minCount << '\t' << endl;
979 if(minDiff > bdiffs) { success = minDiff; } //no good matches
981 //check for reverse match
982 if (alignment != NULL) { delete alignment; }
984 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
985 else{ alignment = NULL; }
987 //can you find the barcode
990 vector< vector<int> > minRGroup;
993 string rawRSequence = reverseOligo(seq.getUnaligned());
994 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
995 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
996 string oligo = reverseOligo(it->first);
997 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
998 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
999 success = bdiffs + 10;
1003 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1004 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
1005 oligo = alignment->getSeqAAln();
1006 string temp = alignment->getSeqBAln();
1008 int alnLength = oligo.length();
1009 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1010 oligo = oligo.substr(0,alnLength);
1011 temp = temp.substr(0,alnLength);
1012 int numDiff = countDiffs(oligo, temp);
1013 if (alnLength == 0) { numDiff = bdiffs + 100; }
1015 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1016 if(numDiff < minDiff){
1020 minRGroup.push_back(it->second);
1021 int tempminRPos = 0;
1023 for(int i=0;i<alnLength;i++){
1028 minRPos.push_back(tempminRPos);
1029 }else if(numDiff == minDiff){
1030 int tempminRPos = 0;
1031 for(int i=0;i<alnLength;i++){
1036 minRPos.push_back(tempminRPos);
1037 minRGroup.push_back(it->second);
1042 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1043 for (int i = 0; i < minFGroup.size(); i++) {
1045 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1049 for (int i = 0; i < minRGroup.size(); i++) {
1051 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1055 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1057 bool foundMatch = false;
1058 vector<int> matches;
1059 for (int i = 0; i < minFGroup.size(); i++) {
1060 for (int j = 0; j < minFGroup[i].size(); j++) {
1061 for (int k = 0; k < minRGroup.size(); k++) {
1062 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1069 if (matches.size() == 1) {
1072 fStart = minFPos[0];
1073 rStart = rawSeq.length() - minRPos[0];
1076 //we have an acceptable match for the forward and reverse, but do they match?
1078 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1079 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1080 if(qual.getName() != ""){
1081 qual.trimQScores(-1, rStart);
1082 qual.trimQScores(fStart, -1);
1085 //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1086 }else { success = bdiffs + 100; }
1090 if (alignment != NULL) { delete alignment; }
1096 catch(exception& e) {
1097 m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1102 //*******************************************************************/
1103 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1106 string rawSeq = seq.getUnaligned();
1107 int success = pdiffs + 1; //guilty until proven innocent
1108 //cout << seq.getName() << endl;
1109 //can you find the forward
1110 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1111 string foligo = it->second.forward;
1112 string roligo = it->second.reverse;
1114 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1115 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1116 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1117 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1120 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1121 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1125 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1128 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1129 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1130 if(qual.getName() != ""){
1131 qual.trimQScores(-1, rawSeq.length()-roligo.length());
1132 qual.trimQScores(foligo.length(), -1);
1139 //cout << "success=" << success << endl;
1140 //if you found the barcode or if you don't want to allow for diffs
1141 if ((pdiffs == 0) || (success == 0)) { return success; }
1142 else { //try aligning and see if you can find it
1143 Alignment* alignment;
1144 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1145 else{ alignment = NULL; }
1147 //can you find the barcode
1150 vector< vector<int> > minFGroup;
1151 vector<int> minFPos;
1153 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1163 only want to look for forward = Sarah, John, Anna, Pat, Gail
1164 reverse = Westcott, Schloss, Brown, Moore
1166 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1168 //cout << endl << forwardSeq.getName() << endl;
1169 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1170 string oligo = it->first;
1172 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1173 success = pdiffs + 10;
1176 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
1177 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1178 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1179 oligo = alignment->getSeqAAln();
1180 string temp = alignment->getSeqBAln();
1182 int alnLength = oligo.length();
1184 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1185 oligo = oligo.substr(0,alnLength);
1186 temp = temp.substr(0,alnLength);
1187 int numDiff = countDiffs(oligo, temp);
1189 if (alnLength == 0) { numDiff = pdiffs + 100; }
1190 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1192 if(numDiff < minDiff){
1196 minFGroup.push_back(it->second);
1197 int tempminFPos = 0;
1199 for(int i=0;i<alnLength;i++){
1204 minFPos.push_back(tempminFPos);
1205 }else if(numDiff == minDiff){
1206 minFGroup.push_back(it->second);
1207 int tempminFPos = 0;
1208 for(int i=0;i<alnLength;i++){
1213 minFPos.push_back(tempminFPos);
1217 //cout << minDiff << '\t' << minCount << '\t' << endl;
1218 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1220 //check for reverse match
1221 if (alignment != NULL) { delete alignment; }
1223 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1224 else{ alignment = NULL; }
1226 //can you find the barcode
1229 vector< vector<int> > minRGroup;
1230 vector<int> minRPos;
1232 string rawRSequence = reverseOligo(seq.getUnaligned());
1234 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1235 string oligo = reverseOligo(it->first);
1236 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1237 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1238 success = pdiffs + 10;
1242 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1243 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1244 oligo = alignment->getSeqAAln();
1245 string temp = alignment->getSeqBAln();
1247 int alnLength = oligo.length();
1248 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1249 oligo = oligo.substr(0,alnLength);
1250 temp = temp.substr(0,alnLength);
1251 int numDiff = countDiffs(oligo, temp);
1252 if (alnLength == 0) { numDiff = pdiffs + 100; }
1254 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1255 if(numDiff < minDiff){
1259 minRGroup.push_back(it->second);
1260 int tempminRPos = 0;
1262 for(int i=0;i<alnLength;i++){
1267 minRPos.push_back(tempminRPos);
1268 }else if(numDiff == minDiff){
1269 int tempminRPos = 0;
1270 for(int i=0;i<alnLength;i++){
1275 minRPos.push_back(tempminRPos);
1276 minRGroup.push_back(it->second);
1281 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1282 for (int i = 0; i < minFGroup.size(); i++) {
1284 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1288 for (int i = 0; i < minRGroup.size(); i++) {
1290 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1294 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1296 bool foundMatch = false;
1297 vector<int> matches;
1298 for (int i = 0; i < minFGroup.size(); i++) {
1299 for (int j = 0; j < minFGroup[i].size(); j++) {
1300 for (int k = 0; k < minRGroup.size(); k++) {
1301 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1308 if (matches.size() == 1) {
1311 fStart = minFPos[0];
1312 rStart = rawSeq.length() - minRPos[0];
1315 //we have an acceptable match for the forward and reverse, but do they match?
1318 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1319 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1320 if(qual.getName() != ""){
1321 qual.trimQScores(-1, rStart);
1322 qual.trimQScores(fStart, -1);
1326 //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1327 }else { success = pdiffs + 100; }
1331 if (alignment != NULL) { delete alignment; }
1337 catch(exception& e) {
1338 m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1344 //*******************************************************************/
1345 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1347 //look for forward barcode
1348 string rawFSequence = forwardSeq.getUnaligned();
1349 string rawRSequence = reverseSeq.getUnaligned();
1350 int success = pdiffs + 1; //guilty until proven innocent
1352 //can you find the forward barcode
1353 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1354 string foligo = it->second.forward;
1355 string roligo = it->second.reverse;
1357 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1358 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1361 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1362 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1366 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1368 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1369 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1370 forwardQual.trimQScores(foligo.length(), -1);
1371 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
1377 //if you found the barcode or if you don't want to allow for diffs
1378 if ((pdiffs == 0) || (success == 0)) { return success; }
1379 else { //try aligning and see if you can find it
1380 Alignment* alignment;
1381 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1382 else{ alignment = NULL; }
1384 //can you find the barcode
1387 vector< vector<int> > minFGroup;
1388 vector<int> minFPos;
1390 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1400 only want to look for forward = Sarah, John, Anna, Pat, Gail
1401 reverse = Westcott, Schloss, Brown, Moore
1403 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1405 //cout << endl << forwardSeq.getName() << endl;
1406 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1407 string oligo = it->first;
1409 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1410 success = pdiffs + 10;
1413 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1414 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1415 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1416 oligo = alignment->getSeqAAln();
1417 string temp = alignment->getSeqBAln();
1419 int alnLength = oligo.length();
1421 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1422 oligo = oligo.substr(0,alnLength);
1423 temp = temp.substr(0,alnLength);
1424 int numDiff = countDiffs(oligo, temp);
1426 if (alnLength == 0) { numDiff = pdiffs + 100; }
1427 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1429 if(numDiff < minDiff){
1433 minFGroup.push_back(it->second);
1434 int tempminFPos = 0;
1436 for(int i=0;i<alnLength;i++){
1441 minFPos.push_back(tempminFPos);
1442 }else if(numDiff == minDiff){
1443 minFGroup.push_back(it->second);
1444 int tempminFPos = 0;
1445 for(int i=0;i<alnLength;i++){
1450 minFPos.push_back(tempminFPos);
1454 //cout << minDiff << '\t' << minCount << '\t' << endl;
1455 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1457 //check for reverse match
1458 if (alignment != NULL) { delete alignment; }
1460 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1461 else{ alignment = NULL; }
1463 //can you find the barcode
1466 vector< vector<int> > minRGroup;
1467 vector<int> minRPos;
1469 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1470 string oligo = it->first;
1471 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1472 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1473 success = pdiffs + 10;
1477 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1478 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1479 oligo = alignment->getSeqAAln();
1480 string temp = alignment->getSeqBAln();
1482 int alnLength = oligo.length();
1483 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1484 oligo = oligo.substr(0,alnLength);
1485 temp = temp.substr(0,alnLength);
1486 int numDiff = countDiffs(oligo, temp);
1487 if (alnLength == 0) { numDiff = pdiffs + 100; }
1489 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1490 if(numDiff < minDiff){
1494 minRGroup.push_back(it->second);
1495 int tempminRPos = 0;
1497 for(int i=0;i<alnLength;i++){
1502 minRPos.push_back(tempminRPos);
1503 }else if(numDiff == minDiff){
1504 int tempminRPos = 0;
1505 for(int i=0;i<alnLength;i++){
1510 minRPos.push_back(tempminRPos);
1511 minRGroup.push_back(it->second);
1516 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1517 for (int i = 0; i < minFGroup.size(); i++) {
1519 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1523 for (int i = 0; i < minRGroup.size(); i++) {
1525 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1529 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1531 bool foundMatch = false;
1532 vector<int> matches;
1533 for (int i = 0; i < minFGroup.size(); i++) {
1534 for (int j = 0; j < minFGroup[i].size(); j++) {
1535 for (int k = 0; k < minRGroup.size(); k++) {
1536 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1543 if (matches.size() == 1) {
1546 fStart = minFPos[0];
1547 rStart = minRPos[0];
1550 //we have an acceptable match for the forward and reverse, but do they match?
1552 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1553 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1554 forwardQual.trimQScores(fStart, -1);
1555 reverseQual.trimQScores(rStart, -1);
1557 }else { success = pdiffs + 100; }
1561 if (alignment != NULL) { delete alignment; }
1567 catch(exception& e) {
1568 m->errorOut(e, "TrimOligos", "stripIForward");
1573 //*******************************************************************/
1574 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1576 //look for forward barcode
1577 string rawFSequence = forwardSeq.getUnaligned();
1578 string rawRSequence = reverseSeq.getUnaligned();
1579 int success = pdiffs + 1; //guilty until proven innocent
1581 //can you find the forward barcode
1582 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1583 string foligo = it->second.forward;
1584 string roligo = it->second.reverse;
1586 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1587 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1590 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1591 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1595 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1597 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1598 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1604 //if you found the barcode or if you don't want to allow for diffs
1605 if ((pdiffs == 0) || (success == 0)) { return success; }
1606 else { //try aligning and see if you can find it
1607 Alignment* alignment;
1608 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1609 else{ alignment = NULL; }
1611 //can you find the barcode
1614 vector< vector<int> > minFGroup;
1615 vector<int> minFPos;
1617 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1627 only want to look for forward = Sarah, John, Anna, Pat, Gail
1628 reverse = Westcott, Schloss, Brown, Moore
1630 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1632 //cout << endl << forwardSeq.getName() << endl;
1633 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1634 string oligo = it->first;
1636 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1637 success = pdiffs + 10;
1640 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1641 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1642 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1643 oligo = alignment->getSeqAAln();
1644 string temp = alignment->getSeqBAln();
1646 int alnLength = oligo.length();
1648 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1649 oligo = oligo.substr(0,alnLength);
1650 temp = temp.substr(0,alnLength);
1651 int numDiff = countDiffs(oligo, temp);
1653 if (alnLength == 0) { numDiff = pdiffs + 100; }
1654 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1656 if(numDiff < minDiff){
1660 minFGroup.push_back(it->second);
1661 int tempminFPos = 0;
1663 for(int i=0;i<alnLength;i++){
1668 minFPos.push_back(tempminFPos);
1669 }else if(numDiff == minDiff){
1670 minFGroup.push_back(it->second);
1671 int tempminFPos = 0;
1672 for(int i=0;i<alnLength;i++){
1677 minFPos.push_back(tempminFPos);
1681 //cout << minDiff << '\t' << minCount << '\t' << endl;
1682 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1684 //check for reverse match
1685 if (alignment != NULL) { delete alignment; }
1687 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1688 else{ alignment = NULL; }
1690 //can you find the barcode
1693 vector< vector<int> > minRGroup;
1694 vector<int> minRPos;
1696 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1697 string oligo = it->first;
1698 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1699 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1700 success = pdiffs + 10;
1704 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1705 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1706 oligo = alignment->getSeqAAln();
1707 string temp = alignment->getSeqBAln();
1709 int alnLength = oligo.length();
1710 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1711 oligo = oligo.substr(0,alnLength);
1712 temp = temp.substr(0,alnLength);
1713 int numDiff = countDiffs(oligo, temp);
1714 if (alnLength == 0) { numDiff = pdiffs + 100; }
1716 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1717 if(numDiff < minDiff){
1721 minRGroup.push_back(it->second);
1722 int tempminRPos = 0;
1724 for(int i=0;i<alnLength;i++){
1729 minRPos.push_back(tempminRPos);
1730 }else if(numDiff == minDiff){
1731 int tempminRPos = 0;
1732 for(int i=0;i<alnLength;i++){
1737 minRPos.push_back(tempminRPos);
1738 minRGroup.push_back(it->second);
1743 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1744 for (int i = 0; i < minFGroup.size(); i++) {
1746 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1750 for (int i = 0; i < minRGroup.size(); i++) {
1752 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1756 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1758 bool foundMatch = false;
1759 vector<int> matches;
1760 for (int i = 0; i < minFGroup.size(); i++) {
1761 for (int j = 0; j < minFGroup[i].size(); j++) {
1762 for (int k = 0; k < minRGroup.size(); k++) {
1763 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1770 if (matches.size() == 1) {
1773 fStart = minFPos[0];
1774 rStart = minRPos[0];
1777 //we have an acceptable match for the forward and reverse, but do they match?
1779 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1780 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1782 }else { success = pdiffs + 100; }
1786 if (alignment != NULL) { delete alignment; }
1792 catch(exception& e) {
1793 m->errorOut(e, "TrimOligos", "stripIForward");
1798 //*******************************************************************/
1799 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1802 string rawSequence = seq.getUnaligned();
1803 int success = bdiffs + 1; //guilty until proven innocent
1805 //can you find the barcode
1806 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1807 string oligo = it->first;
1808 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1809 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1813 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1815 seq.setUnaligned(rawSequence.substr(oligo.length()));
1822 //if you found the barcode or if you don't want to allow for diffs
1823 if ((bdiffs == 0) || (success == 0)) { return success; }
1825 else { //try aligning and see if you can find it
1826 Alignment* alignment;
1827 if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1828 else{ alignment = NULL; }
1830 //can you find the barcode
1836 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1837 string oligo = it->first;
1838 // int length = oligo.length();
1840 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
1841 success = bdiffs + 10;
1845 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1846 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1847 oligo = alignment->getSeqAAln();
1848 string temp = alignment->getSeqBAln();
1850 int alnLength = oligo.length();
1852 for(int i=oligo.length()-1;i>=0;i--){
1853 if(oligo[i] != '-'){ alnLength = i+1; break; }
1855 oligo = oligo.substr(0,alnLength);
1856 temp = temp.substr(0,alnLength);
1858 int numDiff = countDiffs(oligo, temp);
1860 if(numDiff < minDiff){
1863 minGroup = it->second;
1865 for(int i=0;i<alnLength;i++){
1871 else if(numDiff == minDiff){
1877 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1878 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1879 else{ //use the best match
1881 seq.setUnaligned(rawSequence.substr(minPos));
1885 if (alignment != NULL) { delete alignment; }
1892 catch(exception& e) {
1893 m->errorOut(e, "TrimOligos", "stripBarcode");
1899 /********************************************************************/
1900 int TrimOligos::stripForward(Sequence& seq, int& group){
1903 string rawSequence = seq.getUnaligned();
1904 int success = pdiffs + 1; //guilty until proven innocent
1906 //can you find the primer
1907 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1908 string oligo = it->first;
1909 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1910 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1914 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1916 seq.setUnaligned(rawSequence.substr(oligo.length()));
1922 //if you found the barcode or if you don't want to allow for diffs
1923 if ((pdiffs == 0) || (success == 0)) { return success; }
1925 else { //try aligning and see if you can find it
1926 Alignment* alignment;
1927 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1928 else{ alignment = NULL; }
1930 //can you find the barcode
1936 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1937 string oligo = it->first;
1938 // int length = oligo.length();
1940 if(rawSequence.length() < maxFPrimerLength){
1941 success = pdiffs + 100;
1945 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1946 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1947 oligo = alignment->getSeqAAln();
1948 string temp = alignment->getSeqBAln();
1950 int alnLength = oligo.length();
1952 for(int i=oligo.length()-1;i>=0;i--){
1953 if(oligo[i] != '-'){ alnLength = i+1; break; }
1955 oligo = oligo.substr(0,alnLength);
1956 temp = temp.substr(0,alnLength);
1958 int numDiff = countDiffs(oligo, temp);
1960 if(numDiff < minDiff){
1963 minGroup = it->second;
1965 for(int i=0;i<alnLength;i++){
1971 else if(numDiff == minDiff){
1977 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1978 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1979 else{ //use the best match
1981 seq.setUnaligned(rawSequence.substr(minPos));
1985 if (alignment != NULL) { delete alignment; }
1992 catch(exception& e) {
1993 m->errorOut(e, "TrimOligos", "stripForward");
1997 //*******************************************************************/
1998 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
2001 if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
2003 string rawSequence = seq.getUnaligned();
2004 int success = pdiffs + 1; //guilty until proven innocent
2006 //can you find the primer
2007 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2008 string oligo = it->first;
2009 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
2010 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
2014 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2016 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
2017 if(qual.getName() != ""){
2018 if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
2025 //if you found the barcode or if you don't want to allow for diffs
2026 if ((pdiffs == 0) || (success == 0)) { return success; }
2028 else { //try aligning and see if you can find it
2029 Alignment* alignment;
2030 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
2031 else{ alignment = NULL; }
2033 //can you find the barcode
2039 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2040 string oligo = it->first;
2041 // int length = oligo.length();
2043 if(rawSequence.length() < maxFPrimerLength){
2044 success = pdiffs + 100;
2048 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2049 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
2050 oligo = alignment->getSeqAAln();
2051 string temp = alignment->getSeqBAln();
2053 int alnLength = oligo.length();
2055 for(int i=oligo.length()-1;i>=0;i--){
2056 if(oligo[i] != '-'){ alnLength = i+1; break; }
2058 oligo = oligo.substr(0,alnLength);
2059 temp = temp.substr(0,alnLength);
2061 int numDiff = countDiffs(oligo, temp);
2063 if(numDiff < minDiff){
2066 minGroup = it->second;
2068 for(int i=0;i<alnLength;i++){
2074 else if(numDiff == minDiff){
2080 if(minDiff > pdiffs) { success = minDiff; } //no good matches
2081 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
2082 else{ //use the best match
2084 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2085 if(qual.getName() != ""){
2086 if (!keepForward) { qual.trimQScores(minPos, -1); }
2091 if (alignment != NULL) { delete alignment; }
2098 catch(exception& e) {
2099 m->errorOut(e, "TrimOligos", "stripForward");
2103 //******************************************************************/
2104 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2106 string rawSequence = seq.getUnaligned();
2107 bool success = 0; //guilty until proven innocent
2109 for(int i=0;i<revPrimer.size();i++){
2110 string oligo = revPrimer[i];
2112 if(rawSequence.length() < oligo.length()){
2117 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2118 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2119 if(qual.getName() != ""){
2120 qual.trimQScores(-1, rawSequence.length()-oligo.length());
2129 catch(exception& e) {
2130 m->errorOut(e, "TrimOligos", "stripReverse");
2134 //******************************************************************/
2135 bool TrimOligos::stripReverse(Sequence& seq){
2138 string rawSequence = seq.getUnaligned();
2139 bool success = 0; //guilty until proven innocent
2141 for(int i=0;i<revPrimer.size();i++){
2142 string oligo = revPrimer[i];
2144 if(rawSequence.length() < oligo.length()){
2149 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2150 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2159 catch(exception& e) {
2160 m->errorOut(e, "TrimOligos", "stripReverse");
2164 //******************************************************************/
2165 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2167 string rawSequence = seq.getUnaligned();
2168 bool success = ldiffs + 1; //guilty until proven innocent
2170 for(int i=0;i<linker.size();i++){
2171 string oligo = linker[i];
2173 if(rawSequence.length() < oligo.length()){
2174 success = ldiffs + 10;
2178 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2179 seq.setUnaligned(rawSequence.substr(oligo.length()));
2180 if(qual.getName() != ""){
2181 qual.trimQScores(oligo.length(), -1);
2188 //if you found the linker or if you don't want to allow for diffs
2189 if ((ldiffs == 0) || (success == 0)) { return success; }
2191 else { //try aligning and see if you can find it
2192 Alignment* alignment;
2193 if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2194 else{ alignment = NULL; }
2196 //can you find the barcode
2201 for(int i = 0; i < linker.size(); i++){
2202 string oligo = linker[i];
2203 // int length = oligo.length();
2205 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2206 success = ldiffs + 10;
2210 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2211 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2212 oligo = alignment->getSeqAAln();
2213 string temp = alignment->getSeqBAln();
2215 int alnLength = oligo.length();
2217 for(int i=oligo.length()-1;i>=0;i--){
2218 if(oligo[i] != '-'){ alnLength = i+1; break; }
2220 oligo = oligo.substr(0,alnLength);
2221 temp = temp.substr(0,alnLength);
2223 int numDiff = countDiffs(oligo, temp);
2225 if(numDiff < minDiff){
2229 for(int i=0;i<alnLength;i++){
2235 else if(numDiff == minDiff){
2241 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2242 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2243 else{ //use the best match
2244 seq.setUnaligned(rawSequence.substr(minPos));
2246 if(qual.getName() != ""){
2247 qual.trimQScores(minPos, -1);
2252 if (alignment != NULL) { delete alignment; }
2260 catch(exception& e) {
2261 m->errorOut(e, "TrimOligos", "stripLinker");
2265 //******************************************************************/
2266 bool TrimOligos::stripLinker(Sequence& seq){
2269 string rawSequence = seq.getUnaligned();
2270 bool success = ldiffs +1; //guilty until proven innocent
2272 for(int i=0;i<linker.size();i++){
2273 string oligo = linker[i];
2275 if(rawSequence.length() < oligo.length()){
2276 success = ldiffs +10;
2280 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2281 seq.setUnaligned(rawSequence.substr(oligo.length()));
2287 //if you found the linker or if you don't want to allow for diffs
2288 if ((ldiffs == 0) || (success == 0)) { return success; }
2290 else { //try aligning and see if you can find it
2291 Alignment* alignment;
2292 if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2293 else{ alignment = NULL; }
2295 //can you find the barcode
2300 for(int i = 0; i < linker.size(); i++){
2301 string oligo = linker[i];
2302 // int length = oligo.length();
2304 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2305 success = ldiffs + 10;
2309 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2310 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2311 oligo = alignment->getSeqAAln();
2312 string temp = alignment->getSeqBAln();
2314 int alnLength = oligo.length();
2316 for(int i=oligo.length()-1;i>=0;i--){
2317 if(oligo[i] != '-'){ alnLength = i+1; break; }
2319 oligo = oligo.substr(0,alnLength);
2320 temp = temp.substr(0,alnLength);
2322 int numDiff = countDiffs(oligo, temp);
2324 if(numDiff < minDiff){
2328 for(int i=0;i<alnLength;i++){
2334 else if(numDiff == minDiff){
2340 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2341 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2342 else{ //use the best match
2343 seq.setUnaligned(rawSequence.substr(minPos));
2347 if (alignment != NULL) { delete alignment; }
2354 catch(exception& e) {
2355 m->errorOut(e, "TrimOligos", "stripLinker");
2360 //******************************************************************/
2361 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2363 string rawSequence = seq.getUnaligned();
2364 bool success = sdiffs+1; //guilty until proven innocent
2366 for(int i=0;i<spacer.size();i++){
2367 string oligo = spacer[i];
2369 if(rawSequence.length() < oligo.length()){
2370 success = sdiffs+10;
2374 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2375 seq.setUnaligned(rawSequence.substr(oligo.length()));
2376 if(qual.getName() != ""){
2377 qual.trimQScores(oligo.length(), -1);
2384 //if you found the spacer or if you don't want to allow for diffs
2385 if ((sdiffs == 0) || (success == 0)) { return success; }
2387 else { //try aligning and see if you can find it
2388 Alignment* alignment;
2389 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2390 else{ alignment = NULL; }
2392 //can you find the barcode
2397 for(int i = 0; i < spacer.size(); i++){
2398 string oligo = spacer[i];
2399 // int length = oligo.length();
2401 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2402 success = sdiffs + 10;
2406 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2407 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2408 oligo = alignment->getSeqAAln();
2409 string temp = alignment->getSeqBAln();
2411 int alnLength = oligo.length();
2413 for(int i=oligo.length()-1;i>=0;i--){
2414 if(oligo[i] != '-'){ alnLength = i+1; break; }
2416 oligo = oligo.substr(0,alnLength);
2417 temp = temp.substr(0,alnLength);
2419 int numDiff = countDiffs(oligo, temp);
2421 if(numDiff < minDiff){
2425 for(int i=0;i<alnLength;i++){
2431 else if(numDiff == minDiff){
2437 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2438 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2439 else{ //use the best match
2440 seq.setUnaligned(rawSequence.substr(minPos));
2442 if(qual.getName() != ""){
2443 qual.trimQScores(minPos, -1);
2448 if (alignment != NULL) { delete alignment; }
2456 catch(exception& e) {
2457 m->errorOut(e, "TrimOligos", "stripSpacer");
2461 //******************************************************************/
2462 bool TrimOligos::stripSpacer(Sequence& seq){
2465 string rawSequence = seq.getUnaligned();
2466 bool success = sdiffs+1; //guilty until proven innocent
2468 for(int i=0;i<spacer.size();i++){
2469 string oligo = spacer[i];
2471 if(rawSequence.length() < oligo.length()){
2472 success = sdiffs+10;
2476 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2477 seq.setUnaligned(rawSequence.substr(oligo.length()));
2483 //if you found the spacer or if you don't want to allow for diffs
2484 if ((sdiffs == 0) || (success == 0)) { return success; }
2486 else { //try aligning and see if you can find it
2487 Alignment* alignment;
2488 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2489 else{ alignment = NULL; }
2491 //can you find the barcode
2496 for(int i = 0; i < spacer.size(); i++){
2497 string oligo = spacer[i];
2498 // int length = oligo.length();
2500 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2501 success = sdiffs + 10;
2505 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2506 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2507 oligo = alignment->getSeqAAln();
2508 string temp = alignment->getSeqBAln();
2510 int alnLength = oligo.length();
2512 for(int i=oligo.length()-1;i>=0;i--){
2513 if(oligo[i] != '-'){ alnLength = i+1; break; }
2515 oligo = oligo.substr(0,alnLength);
2516 temp = temp.substr(0,alnLength);
2518 int numDiff = countDiffs(oligo, temp);
2520 if(numDiff < minDiff){
2524 for(int i=0;i<alnLength;i++){
2530 else if(numDiff == minDiff){
2536 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2537 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2538 else{ //use the best match
2539 seq.setUnaligned(rawSequence.substr(minPos));
2543 if (alignment != NULL) { delete alignment; }
2550 catch(exception& e) {
2551 m->errorOut(e, "TrimOligos", "stripSpacer");
2556 //******************************************************************/
2557 bool TrimOligos::compareDNASeq(string oligo, string seq){
2560 int length = oligo.length();
2562 for(int i=0;i<length;i++){
2564 if(oligo[i] != seq[i]){
2565 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
2566 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
2567 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
2568 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
2569 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
2570 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2571 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
2572 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2573 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2574 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2575 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
2576 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2578 if(success == 0) { break; }
2587 catch(exception& e) {
2588 m->errorOut(e, "TrimOligos", "compareDNASeq");
2593 //********************************************************************/
2594 int TrimOligos::countDiffs(string oligo, string seq){
2597 int length = oligo.length();
2600 for(int i=0;i<length;i++){
2602 if(oligo[i] != seq[i]){
2603 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
2604 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
2605 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
2606 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
2607 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
2608 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2609 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
2610 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2611 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2612 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2613 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
2614 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2621 catch(exception& e) {
2622 m->errorOut(e, "TrimOligos", "countDiffs");
2626 //********************************************************************/
2627 string TrimOligos::reverseOligo(string oligo){
2629 string reverse = "";
2631 for(int i=oligo.length()-1;i>=0;i--){
2633 if(oligo[i] == 'A') { reverse += 'T'; }
2634 else if(oligo[i] == 'T'){ reverse += 'A'; }
2635 else if(oligo[i] == 'U'){ reverse += 'A'; }
2637 else if(oligo[i] == 'G'){ reverse += 'C'; }
2638 else if(oligo[i] == 'C'){ reverse += 'G'; }
2640 else if(oligo[i] == 'R'){ reverse += 'Y'; }
2641 else if(oligo[i] == 'Y'){ reverse += 'R'; }
2643 else if(oligo[i] == 'M'){ reverse += 'K'; }
2644 else if(oligo[i] == 'K'){ reverse += 'M'; }
2646 else if(oligo[i] == 'W'){ reverse += 'W'; }
2647 else if(oligo[i] == 'S'){ reverse += 'S'; }
2649 else if(oligo[i] == 'B'){ reverse += 'V'; }
2650 else if(oligo[i] == 'V'){ reverse += 'B'; }
2652 else if(oligo[i] == 'D'){ reverse += 'H'; }
2653 else if(oligo[i] == 'H'){ reverse += 'D'; }
2655 else if(oligo[i] == '-'){ reverse += '-'; }
2656 else { reverse += 'N'; }
2662 catch(exception& e) {
2663 m->errorOut(e, "TrimOligos", "reverseOligo");
2668 /********************************************************************/