5 * Created by westcott on 9/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
19 m = MothurOut::getInstance();
32 maxFBarcodeLength = 0;
33 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34 if(it->first.length() > maxFBarcodeLength){
35 maxFBarcodeLength = it->first.length();
39 map<string,int>::iterator it;
40 for(it=primers.begin();it!=primers.end();it++){
41 if(it->first.length() > maxFPrimerLength){
42 maxFPrimerLength = it->first.length();
47 for(int i = 0; i < linker.size(); i++){
48 if(linker[i].length() > maxLinkerLength){
49 maxLinkerLength = linker[i].length();
54 for(int i = 0; i < spacer.size(); i++){
55 if(spacer[i].length() > maxSpacerLength){
56 maxSpacerLength = spacer[i].length();
61 m->errorOut(e, "TrimOligos", "TrimOligos");
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
69 m = MothurOut::getInstance();
77 maxFBarcodeLength = 0;
78 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79 string forward = it->second.forward;
80 map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
82 if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
84 if (itForward == ifbarcodes.end()) {
85 vector<int> temp; temp.push_back(it->first);
86 ifbarcodes[forward] = temp;
87 }else { itForward->second.push_back(it->first); }
91 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92 string forward = it->second.forward;
93 map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
95 if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
97 if (itForward == ifprimers.end()) {
98 vector<int> temp; temp.push_back(it->first);
99 ifprimers[forward] = temp;
100 }else { itForward->second.push_back(it->first); }
103 maxRBarcodeLength = 0;
104 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105 string forward = it->second.reverse;
106 map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
108 if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
110 if (itForward == irbarcodes.end()) {
111 vector<int> temp; temp.push_back(it->first);
112 irbarcodes[forward] = temp;
113 }else { itForward->second.push_back(it->first); }
116 maxRPrimerLength = 0;
117 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118 string forward = it->second.reverse;
119 map<string, vector<int> >::iterator itForward = irprimers.find(forward);
121 if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
123 if (itForward == irprimers.end()) {
124 vector<int> temp; temp.push_back(it->first);
125 irprimers[forward] = temp;
126 }else { itForward->second.push_back(it->first); }
132 catch(exception& e) {
133 m->errorOut(e, "TrimOligos", "TrimOligos");
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
141 m = MothurOut::getInstance();
151 maxFBarcodeLength = 0;
152 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153 string oligo = it->first;
154 if(oligo.length() > maxFBarcodeLength){
155 maxFBarcodeLength = oligo.length();
158 maxRBarcodeLength = maxFBarcodeLength;
160 maxFPrimerLength = 0;
161 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162 string oligo = it->first;
163 if(oligo.length() > maxFPrimerLength){
164 maxFPrimerLength = oligo.length();
167 maxRPrimerLength = maxFPrimerLength;
169 catch(exception& e) {
170 m->errorOut(e, "TrimOligos", "TrimOligos");
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
180 string rawSequence = seq.getUnaligned();
182 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183 string oligo = it->first;
185 if(rawSequence.length() < oligo.length()) { break; }
188 int olength = oligo.length();
189 for (int j = 0; j < rawSequence.length()-olength; j++){
190 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191 string rawChunk = rawSequence.substr(j, olength);
192 if(compareDNASeq(oligo, rawChunk)) {
194 primerEnd = primerStart + olength;
201 primerStart = 0; primerEnd = 0;
202 //if you don't want to allow for diffs
203 if (pdiffs == 0) { return false; }
204 else { //try aligning and see if you can find it
206 //can you find the barcode
210 Alignment* alignment;
211 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212 else{ alignment = NULL; }
214 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
216 string prim = it->first;
218 int olength = prim.length();
219 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
221 for (int j = 0; j < rawSequence.length()-olength; j++){
223 string oligo = it->first;
225 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
227 string rawChunk = rawSequence.substr(j, olength+pdiffs);
229 //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230 alignment->alignPrimer(oligo, rawChunk);
231 oligo = alignment->getSeqAAln();
232 string temp = alignment->getSeqBAln();
234 int alnLength = oligo.length();
236 for(int i=oligo.length()-1;i>=0;i--){
237 if(oligo[i] != '-'){ alnLength = i+1; break; }
239 oligo = oligo.substr(0,alnLength);
240 temp = temp.substr(0,alnLength);
242 int numDiff = countDiffs(oligo, temp);
244 if(numDiff < minDiff){
248 primerEnd = primerStart + alnLength;
249 }else if(numDiff == minDiff){ minCount++; }
254 if (alignment != NULL) { delete alignment; }
256 if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches
257 else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers
261 primerStart = 0; primerEnd = 0;
265 catch(exception& e) {
266 m->errorOut(e, "TrimOligos", "stripForward");
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
274 string rawSequence = seq.getUnaligned();
276 for(int i=0;i<revPrimer.size();i++){
277 string oligo = revPrimer[i];
278 if(rawSequence.length() < oligo.length()) { break; }
281 int olength = oligo.length();
282 for (int j = rawSequence.length()-olength; j >= 0; j--){
283 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284 string rawChunk = rawSequence.substr(j, olength);
286 if(compareDNASeq(oligo, rawChunk)) {
288 primerEnd = primerStart + olength;
295 primerStart = 0; primerEnd = 0;
298 catch(exception& e) {
299 m->errorOut(e, "PcrSeqsCommand", "findReverse");
303 //*******************************************************************/
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
308 if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
310 string rawSequence = seq.getUnaligned();
311 int success = bdiffs + 1; //guilty until proven innocent
313 //can you find the barcode
314 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315 string oligo = it->first;
316 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
317 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
321 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
323 seq.setUnaligned(rawSequence.substr(oligo.length()));
325 if(qual.getName() != ""){
326 qual.trimQScores(oligo.length(), -1);
334 //if you found the barcode or if you don't want to allow for diffs
335 if ((bdiffs == 0) || (success == 0)) { return success; }
337 else { //try aligning and see if you can find it
338 Alignment* alignment;
339 if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340 else{ alignment = NULL; }
342 //can you find the barcode
348 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349 string oligo = it->first;
350 // int length = oligo.length();
352 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
353 success = bdiffs + 10;
357 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359 oligo = alignment->getSeqAAln();
360 string temp = alignment->getSeqBAln();
362 int alnLength = oligo.length();
364 for(int i=oligo.length()-1;i>=0;i--){
365 if(oligo[i] != '-'){ alnLength = i+1; break; }
367 oligo = oligo.substr(0,alnLength);
368 temp = temp.substr(0,alnLength);
369 int numDiff = countDiffs(oligo, temp);
371 if(numDiff < minDiff){
374 minGroup = it->second;
376 for(int i=0;i<alnLength;i++){
382 else if(numDiff == minDiff){
388 if(minDiff > bdiffs) { success = minDiff; } //no good matches
389 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
390 else{ //use the best match
392 seq.setUnaligned(rawSequence.substr(minPos));
394 if(qual.getName() != ""){
395 qual.trimQScores(minPos, -1);
400 if (alignment != NULL) { delete alignment; }
407 catch(exception& e) {
408 m->errorOut(e, "TrimOligos", "stripBarcode");
412 //*******************************************************************/
413 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
415 //look for forward barcode
416 string rawFSequence = forwardSeq.getUnaligned();
417 string rawRSequence = reverseSeq.getUnaligned();
418 int success = bdiffs + 1; //guilty until proven innocent
420 //can you find the forward barcode
421 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
422 string foligo = it->second.forward;
423 string roligo = it->second.reverse;
425 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
426 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
429 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
430 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
434 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
436 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
437 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
443 //if you found the barcode or if you don't want to allow for diffs
444 if ((bdiffs == 0) || (success == 0)) { return success; }
445 else { //try aligning and see if you can find it
446 Alignment* alignment;
447 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
448 else{ alignment = NULL; }
450 //can you find the barcode
453 vector< vector<int> > minFGroup;
456 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
465 only want to look for forward = Sarah, John, Anna, Pat, Gail
466 reverse = Westcott, Schloss, Brown, Moore
467 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
469 //cout << endl << forwardSeq.getName() << endl;
470 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
471 string oligo = it->first;
473 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
474 success = bdiffs + 10;
477 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
478 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
479 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
480 oligo = alignment->getSeqAAln();
481 string temp = alignment->getSeqBAln();
483 int alnLength = oligo.length();
485 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
486 oligo = oligo.substr(0,alnLength);
487 temp = temp.substr(0,alnLength);
488 int numDiff = countDiffs(oligo, temp);
490 if (alnLength == 0) { numDiff = bdiffs + 100; }
491 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
493 if(numDiff < minDiff){
497 minFGroup.push_back(it->second);
500 for(int i=0;i<alnLength;i++){
505 minFPos.push_back(tempminFPos);
506 }else if(numDiff == minDiff){
507 minFGroup.push_back(it->second);
509 for(int i=0;i<alnLength;i++){
514 minFPos.push_back(tempminFPos);
518 //cout << minDiff << '\t' << minCount << '\t' << endl;
519 if(minDiff > bdiffs) { success = minDiff; } //no good matches
521 //check for reverse match
522 if (alignment != NULL) { delete alignment; }
524 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
525 else{ alignment = NULL; }
527 //can you find the barcode
530 vector< vector<int> > minRGroup;
533 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
534 string oligo = it->first;
535 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
536 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
537 success = bdiffs + 10;
541 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
542 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
543 oligo = alignment->getSeqAAln();
544 string temp = alignment->getSeqBAln();
546 int alnLength = oligo.length();
547 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
548 oligo = oligo.substr(0,alnLength);
549 temp = temp.substr(0,alnLength);
550 int numDiff = countDiffs(oligo, temp);
551 if (alnLength == 0) { numDiff = bdiffs + 100; }
553 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
554 if(numDiff < minDiff){
558 minRGroup.push_back(it->second);
561 for(int i=0;i<alnLength;i++){
566 minRPos.push_back(tempminRPos);
567 }else if(numDiff == minDiff){
569 for(int i=0;i<alnLength;i++){
574 minRPos.push_back(tempminRPos);
575 minRGroup.push_back(it->second);
580 /*cout << minDiff << '\t' << minCount << '\t' << endl;
581 for (int i = 0; i < minFGroup.size(); i++) {
583 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
587 for (int i = 0; i < minRGroup.size(); i++) {
589 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
593 if(minDiff > bdiffs) { success = minDiff; } //no good matches
595 bool foundMatch = false;
597 for (int i = 0; i < minFGroup.size(); i++) {
598 for (int j = 0; j < minFGroup[i].size(); j++) {
599 for (int k = 0; k < minRGroup.size(); k++) {
600 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
607 if (matches.size() == 1) {
614 //we have an acceptable match for the forward and reverse, but do they match?
616 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
617 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
619 }else { success = bdiffs + 100; }
623 if (alignment != NULL) { delete alignment; }
629 catch(exception& e) {
630 m->errorOut(e, "TrimOligos", "stripIBarcode");
635 //*******************************************************************/
636 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
638 //look for forward barcode
639 string rawFSequence = forwardSeq.getUnaligned();
640 string rawRSequence = reverseSeq.getUnaligned();
641 int success = bdiffs + 1; //guilty until proven innocent
643 //can you find the forward barcode
644 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
645 string foligo = it->second.forward;
646 string roligo = it->second.reverse;
648 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
649 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
652 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
653 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
657 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
659 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
660 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
661 forwardQual.trimQScores(foligo.length(), -1);
662 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
668 //if you found the barcode or if you don't want to allow for diffs
669 if ((bdiffs == 0) || (success == 0)) { return success; }
670 else { //try aligning and see if you can find it
671 Alignment* alignment;
672 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
673 else{ alignment = NULL; }
675 //can you find the barcode
678 vector< vector<int> > minFGroup;
681 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
690 only want to look for forward = Sarah, John, Anna, Pat, Gail
691 reverse = Westcott, Schloss, Brown, Moore
692 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
694 //cout << endl << forwardSeq.getName() << endl;
695 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
696 string oligo = it->first;
698 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
699 success = bdiffs + 10;
702 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
703 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
704 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
705 oligo = alignment->getSeqAAln();
706 string temp = alignment->getSeqBAln();
708 int alnLength = oligo.length();
710 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
711 oligo = oligo.substr(0,alnLength);
712 temp = temp.substr(0,alnLength);
713 int numDiff = countDiffs(oligo, temp);
715 if (alnLength == 0) { numDiff = bdiffs + 100; }
716 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
718 if(numDiff < minDiff){
722 minFGroup.push_back(it->second);
725 for(int i=0;i<alnLength;i++){
730 minFPos.push_back(tempminFPos);
731 }else if(numDiff == minDiff){
732 minFGroup.push_back(it->second);
734 for(int i=0;i<alnLength;i++){
739 minFPos.push_back(tempminFPos);
743 //cout << minDiff << '\t' << minCount << '\t' << endl;
744 if(minDiff > bdiffs) { success = minDiff; } //no good matches
746 //check for reverse match
747 if (alignment != NULL) { delete alignment; }
749 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
750 else{ alignment = NULL; }
752 //can you find the barcode
755 vector< vector<int> > minRGroup;
758 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
759 string oligo = it->first;
760 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
761 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
762 success = bdiffs + 10;
766 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
767 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
768 oligo = alignment->getSeqAAln();
769 string temp = alignment->getSeqBAln();
771 int alnLength = oligo.length();
772 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
773 oligo = oligo.substr(0,alnLength);
774 temp = temp.substr(0,alnLength);
775 int numDiff = countDiffs(oligo, temp);
776 if (alnLength == 0) { numDiff = bdiffs + 100; }
778 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
779 if(numDiff < minDiff){
783 minRGroup.push_back(it->second);
786 for(int i=0;i<alnLength;i++){
791 minRPos.push_back(tempminRPos);
792 }else if(numDiff == minDiff){
794 for(int i=0;i<alnLength;i++){
799 minRPos.push_back(tempminRPos);
800 minRGroup.push_back(it->second);
805 /*cout << minDiff << '\t' << minCount << '\t' << endl;
806 for (int i = 0; i < minFGroup.size(); i++) {
808 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
812 for (int i = 0; i < minRGroup.size(); i++) {
814 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
818 if(minDiff > bdiffs) { success = minDiff; } //no good matches
820 bool foundMatch = false;
822 for (int i = 0; i < minFGroup.size(); i++) {
823 for (int j = 0; j < minFGroup[i].size(); j++) {
824 for (int k = 0; k < minRGroup.size(); k++) {
825 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
832 if (matches.size() == 1) {
839 //we have an acceptable match for the forward and reverse, but do they match?
841 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
842 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
843 forwardQual.trimQScores(fStart, -1);
844 reverseQual.trimQScores(rStart, -1);
846 }else { success = bdiffs + 100; }
850 if (alignment != NULL) { delete alignment; }
856 catch(exception& e) {
857 m->errorOut(e, "TrimOligos", "stripIBarcode");
862 //*******************************************************************/
863 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
865 //look for forward barcode
866 string rawSeq = seq.getUnaligned();
867 int success = bdiffs + 1; //guilty until proven innocent
868 //cout << seq.getName() << endl;
869 //can you find the forward barcode
870 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
871 string foligo = it->second.forward;
872 string roligo = it->second.reverse;
873 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
874 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
875 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
876 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
879 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
880 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
884 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
886 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
887 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
888 if(qual.getName() != ""){
889 qual.trimQScores(-1, rawSeq.length()-roligo.length());
890 qual.trimQScores(foligo.length(), -1);
896 //cout << "success=" << success << endl;
897 //if you found the barcode or if you don't want to allow for diffs
898 if ((bdiffs == 0) || (success == 0)) { return success; }
899 else { //try aligning and see if you can find it
900 Alignment* alignment;
901 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
902 else{ alignment = NULL; }
904 //can you find the barcode
907 vector< vector<int> > minFGroup;
910 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
919 only want to look for forward = Sarah, John, Anna, Pat, Gail
920 reverse = Westcott, Schloss, Brown, Moore
921 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
923 //cout << endl << forwardSeq.getName() << endl;
924 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
925 string oligo = it->first;
927 if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
928 success = bdiffs + 10;
931 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
932 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
933 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
934 oligo = alignment->getSeqAAln();
935 string temp = alignment->getSeqBAln();
937 int alnLength = oligo.length();
939 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
940 oligo = oligo.substr(0,alnLength);
941 temp = temp.substr(0,alnLength);
942 int numDiff = countDiffs(oligo, temp);
944 if (alnLength == 0) { numDiff = bdiffs + 100; }
945 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
947 if(numDiff < minDiff){
951 minFGroup.push_back(it->second);
954 for(int i=0;i<alnLength;i++){
959 minFPos.push_back(tempminFPos);
960 }else if(numDiff == minDiff){
961 minFGroup.push_back(it->second);
963 for(int i=0;i<alnLength;i++){
968 minFPos.push_back(tempminFPos);
972 //cout << minDiff << '\t' << minCount << '\t' << endl;
973 if(minDiff > bdiffs) { success = minDiff; } //no good matches
975 //check for reverse match
976 if (alignment != NULL) { delete alignment; }
978 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
979 else{ alignment = NULL; }
981 //can you find the barcode
984 vector< vector<int> > minRGroup;
987 string rawRSequence = reverseOligo(seq.getUnaligned());
988 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
989 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
990 string oligo = reverseOligo(it->first);
991 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
992 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
993 success = bdiffs + 10;
997 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
998 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
999 oligo = alignment->getSeqAAln();
1000 string temp = alignment->getSeqBAln();
1002 int alnLength = oligo.length();
1003 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1004 oligo = oligo.substr(0,alnLength);
1005 temp = temp.substr(0,alnLength);
1006 int numDiff = countDiffs(oligo, temp);
1007 if (alnLength == 0) { numDiff = bdiffs + 100; }
1009 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1010 if(numDiff < minDiff){
1014 minRGroup.push_back(it->second);
1015 int tempminRPos = 0;
1017 for(int i=0;i<alnLength;i++){
1022 minRPos.push_back(tempminRPos);
1023 }else if(numDiff == minDiff){
1024 int tempminRPos = 0;
1025 for(int i=0;i<alnLength;i++){
1030 minRPos.push_back(tempminRPos);
1031 minRGroup.push_back(it->second);
1036 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1037 for (int i = 0; i < minFGroup.size(); i++) {
1039 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1043 for (int i = 0; i < minRGroup.size(); i++) {
1045 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1049 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1051 bool foundMatch = false;
1052 vector<int> matches;
1053 for (int i = 0; i < minFGroup.size(); i++) {
1054 for (int j = 0; j < minFGroup[i].size(); j++) {
1055 for (int k = 0; k < minRGroup.size(); k++) {
1056 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1063 if (matches.size() == 1) {
1066 fStart = minFPos[0];
1067 rStart = rawSeq.length() - minRPos[0];
1070 //we have an acceptable match for the forward and reverse, but do they match?
1072 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1073 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1074 if(qual.getName() != ""){
1075 qual.trimQScores(-1, rStart);
1076 qual.trimQScores(fStart, -1);
1079 //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1080 }else { success = bdiffs + 100; }
1084 if (alignment != NULL) { delete alignment; }
1090 catch(exception& e) {
1091 m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1096 //*******************************************************************/
1097 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1100 string rawSeq = seq.getUnaligned();
1101 int success = pdiffs + 1; //guilty until proven innocent
1102 //cout << seq.getName() << endl;
1103 //can you find the forward
1104 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1105 string foligo = it->second.forward;
1106 string roligo = it->second.reverse;
1108 //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1109 //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1110 if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1111 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1114 if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1115 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1119 if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1122 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1123 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1124 if(qual.getName() != ""){
1125 qual.trimQScores(-1, rawSeq.length()-roligo.length());
1126 qual.trimQScores(foligo.length(), -1);
1133 //cout << "success=" << success << endl;
1134 //if you found the barcode or if you don't want to allow for diffs
1135 if ((pdiffs == 0) || (success == 0)) { return success; }
1136 else { //try aligning and see if you can find it
1137 Alignment* alignment;
1138 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1139 else{ alignment = NULL; }
1141 //can you find the barcode
1144 vector< vector<int> > minFGroup;
1145 vector<int> minFPos;
1147 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1156 only want to look for forward = Sarah, John, Anna, Pat, Gail
1157 reverse = Westcott, Schloss, Brown, Moore
1158 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1160 //cout << endl << forwardSeq.getName() << endl;
1161 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1162 string oligo = it->first;
1164 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1165 success = pdiffs + 10;
1168 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
1169 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1170 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1171 oligo = alignment->getSeqAAln();
1172 string temp = alignment->getSeqBAln();
1174 int alnLength = oligo.length();
1176 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1177 oligo = oligo.substr(0,alnLength);
1178 temp = temp.substr(0,alnLength);
1179 int numDiff = countDiffs(oligo, temp);
1181 if (alnLength == 0) { numDiff = pdiffs + 100; }
1182 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1184 if(numDiff < minDiff){
1188 minFGroup.push_back(it->second);
1189 int tempminFPos = 0;
1191 for(int i=0;i<alnLength;i++){
1196 minFPos.push_back(tempminFPos);
1197 }else if(numDiff == minDiff){
1198 minFGroup.push_back(it->second);
1199 int tempminFPos = 0;
1200 for(int i=0;i<alnLength;i++){
1205 minFPos.push_back(tempminFPos);
1209 //cout << minDiff << '\t' << minCount << '\t' << endl;
1210 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1212 //check for reverse match
1213 if (alignment != NULL) { delete alignment; }
1215 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1216 else{ alignment = NULL; }
1218 //can you find the barcode
1221 vector< vector<int> > minRGroup;
1222 vector<int> minRPos;
1224 string rawRSequence = reverseOligo(seq.getUnaligned());
1226 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1227 string oligo = reverseOligo(it->first);
1228 //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1229 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1230 success = pdiffs + 10;
1234 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1235 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1236 oligo = alignment->getSeqAAln();
1237 string temp = alignment->getSeqBAln();
1239 int alnLength = oligo.length();
1240 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1241 oligo = oligo.substr(0,alnLength);
1242 temp = temp.substr(0,alnLength);
1243 int numDiff = countDiffs(oligo, temp);
1244 if (alnLength == 0) { numDiff = pdiffs + 100; }
1246 //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1247 if(numDiff < minDiff){
1251 minRGroup.push_back(it->second);
1252 int tempminRPos = 0;
1254 for(int i=0;i<alnLength;i++){
1259 minRPos.push_back(tempminRPos);
1260 }else if(numDiff == minDiff){
1261 int tempminRPos = 0;
1262 for(int i=0;i<alnLength;i++){
1267 minRPos.push_back(tempminRPos);
1268 minRGroup.push_back(it->second);
1273 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1274 for (int i = 0; i < minFGroup.size(); i++) {
1276 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1280 for (int i = 0; i < minRGroup.size(); i++) {
1282 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1286 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1288 bool foundMatch = false;
1289 vector<int> matches;
1290 for (int i = 0; i < minFGroup.size(); i++) {
1291 for (int j = 0; j < minFGroup[i].size(); j++) {
1292 for (int k = 0; k < minRGroup.size(); k++) {
1293 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1300 if (matches.size() == 1) {
1303 fStart = minFPos[0];
1304 rStart = rawSeq.length() - minRPos[0];
1307 //we have an acceptable match for the forward and reverse, but do they match?
1310 string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1311 seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1312 if(qual.getName() != ""){
1313 qual.trimQScores(-1, rStart);
1314 qual.trimQScores(fStart, -1);
1318 //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1319 }else { success = pdiffs + 100; }
1323 if (alignment != NULL) { delete alignment; }
1329 catch(exception& e) {
1330 m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1336 //*******************************************************************/
1337 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1339 //look for forward barcode
1340 string rawFSequence = forwardSeq.getUnaligned();
1341 string rawRSequence = reverseSeq.getUnaligned();
1342 int success = pdiffs + 1; //guilty until proven innocent
1344 //can you find the forward barcode
1345 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1346 string foligo = it->second.forward;
1347 string roligo = it->second.reverse;
1349 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1350 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1353 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1354 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1358 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1360 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1361 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1362 forwardQual.trimQScores(foligo.length(), -1);
1363 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
1369 //if you found the barcode or if you don't want to allow for diffs
1370 if ((pdiffs == 0) || (success == 0)) { return success; }
1371 else { //try aligning and see if you can find it
1372 Alignment* alignment;
1373 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1374 else{ alignment = NULL; }
1376 //can you find the barcode
1379 vector< vector<int> > minFGroup;
1380 vector<int> minFPos;
1382 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1391 only want to look for forward = Sarah, John, Anna, Pat, Gail
1392 reverse = Westcott, Schloss, Brown, Moore
1393 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1395 //cout << endl << forwardSeq.getName() << endl;
1396 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1397 string oligo = it->first;
1399 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1400 success = pdiffs + 10;
1403 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1404 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1405 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1406 oligo = alignment->getSeqAAln();
1407 string temp = alignment->getSeqBAln();
1409 int alnLength = oligo.length();
1411 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1412 oligo = oligo.substr(0,alnLength);
1413 temp = temp.substr(0,alnLength);
1414 int numDiff = countDiffs(oligo, temp);
1416 if (alnLength == 0) { numDiff = pdiffs + 100; }
1417 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1419 if(numDiff < minDiff){
1423 minFGroup.push_back(it->second);
1424 int tempminFPos = 0;
1426 for(int i=0;i<alnLength;i++){
1431 minFPos.push_back(tempminFPos);
1432 }else if(numDiff == minDiff){
1433 minFGroup.push_back(it->second);
1434 int tempminFPos = 0;
1435 for(int i=0;i<alnLength;i++){
1440 minFPos.push_back(tempminFPos);
1444 //cout << minDiff << '\t' << minCount << '\t' << endl;
1445 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1447 //check for reverse match
1448 if (alignment != NULL) { delete alignment; }
1450 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1451 else{ alignment = NULL; }
1453 //can you find the barcode
1456 vector< vector<int> > minRGroup;
1457 vector<int> minRPos;
1459 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1460 string oligo = it->first;
1461 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1462 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1463 success = pdiffs + 10;
1467 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1468 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1469 oligo = alignment->getSeqAAln();
1470 string temp = alignment->getSeqBAln();
1472 int alnLength = oligo.length();
1473 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1474 oligo = oligo.substr(0,alnLength);
1475 temp = temp.substr(0,alnLength);
1476 int numDiff = countDiffs(oligo, temp);
1477 if (alnLength == 0) { numDiff = pdiffs + 100; }
1479 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1480 if(numDiff < minDiff){
1484 minRGroup.push_back(it->second);
1485 int tempminRPos = 0;
1487 for(int i=0;i<alnLength;i++){
1492 minRPos.push_back(tempminRPos);
1493 }else if(numDiff == minDiff){
1494 int tempminRPos = 0;
1495 for(int i=0;i<alnLength;i++){
1500 minRPos.push_back(tempminRPos);
1501 minRGroup.push_back(it->second);
1506 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1507 for (int i = 0; i < minFGroup.size(); i++) {
1509 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1513 for (int i = 0; i < minRGroup.size(); i++) {
1515 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1519 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1521 bool foundMatch = false;
1522 vector<int> matches;
1523 for (int i = 0; i < minFGroup.size(); i++) {
1524 for (int j = 0; j < minFGroup[i].size(); j++) {
1525 for (int k = 0; k < minRGroup.size(); k++) {
1526 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1533 if (matches.size() == 1) {
1536 fStart = minFPos[0];
1537 rStart = minRPos[0];
1540 //we have an acceptable match for the forward and reverse, but do they match?
1542 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1543 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1544 forwardQual.trimQScores(fStart, -1);
1545 reverseQual.trimQScores(rStart, -1);
1547 }else { success = pdiffs + 100; }
1551 if (alignment != NULL) { delete alignment; }
1557 catch(exception& e) {
1558 m->errorOut(e, "TrimOligos", "stripIForward");
1563 //*******************************************************************/
1564 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1566 //look for forward barcode
1567 string rawFSequence = forwardSeq.getUnaligned();
1568 string rawRSequence = reverseSeq.getUnaligned();
1569 int success = pdiffs + 1; //guilty until proven innocent
1571 //can you find the forward barcode
1572 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1573 string foligo = it->second.forward;
1574 string roligo = it->second.reverse;
1576 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1577 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1580 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1581 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1585 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1587 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1588 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1594 //if you found the barcode or if you don't want to allow for diffs
1595 if ((pdiffs == 0) || (success == 0)) { return success; }
1596 else { //try aligning and see if you can find it
1597 Alignment* alignment;
1598 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1599 else{ alignment = NULL; }
1601 //can you find the barcode
1604 vector< vector<int> > minFGroup;
1605 vector<int> minFPos;
1607 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1616 only want to look for forward = Sarah, John, Anna, Pat, Gail
1617 reverse = Westcott, Schloss, Brown, Moore
1618 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1620 //cout << endl << forwardSeq.getName() << endl;
1621 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1622 string oligo = it->first;
1624 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1625 success = pdiffs + 10;
1628 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1629 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1630 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1631 oligo = alignment->getSeqAAln();
1632 string temp = alignment->getSeqBAln();
1634 int alnLength = oligo.length();
1636 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1637 oligo = oligo.substr(0,alnLength);
1638 temp = temp.substr(0,alnLength);
1639 int numDiff = countDiffs(oligo, temp);
1641 if (alnLength == 0) { numDiff = pdiffs + 100; }
1642 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1644 if(numDiff < minDiff){
1648 minFGroup.push_back(it->second);
1649 int tempminFPos = 0;
1651 for(int i=0;i<alnLength;i++){
1656 minFPos.push_back(tempminFPos);
1657 }else if(numDiff == minDiff){
1658 minFGroup.push_back(it->second);
1659 int tempminFPos = 0;
1660 for(int i=0;i<alnLength;i++){
1665 minFPos.push_back(tempminFPos);
1669 //cout << minDiff << '\t' << minCount << '\t' << endl;
1670 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1672 //check for reverse match
1673 if (alignment != NULL) { delete alignment; }
1675 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1676 else{ alignment = NULL; }
1678 //can you find the barcode
1681 vector< vector<int> > minRGroup;
1682 vector<int> minRPos;
1684 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1685 string oligo = it->first;
1686 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1687 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1688 success = pdiffs + 10;
1692 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1693 alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1694 oligo = alignment->getSeqAAln();
1695 string temp = alignment->getSeqBAln();
1697 int alnLength = oligo.length();
1698 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1699 oligo = oligo.substr(0,alnLength);
1700 temp = temp.substr(0,alnLength);
1701 int numDiff = countDiffs(oligo, temp);
1702 if (alnLength == 0) { numDiff = pdiffs + 100; }
1704 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1705 if(numDiff < minDiff){
1709 minRGroup.push_back(it->second);
1710 int tempminRPos = 0;
1712 for(int i=0;i<alnLength;i++){
1717 minRPos.push_back(tempminRPos);
1718 }else if(numDiff == minDiff){
1719 int tempminRPos = 0;
1720 for(int i=0;i<alnLength;i++){
1725 minRPos.push_back(tempminRPos);
1726 minRGroup.push_back(it->second);
1731 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1732 for (int i = 0; i < minFGroup.size(); i++) {
1734 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1738 for (int i = 0; i < minRGroup.size(); i++) {
1740 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1744 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1746 bool foundMatch = false;
1747 vector<int> matches;
1748 for (int i = 0; i < minFGroup.size(); i++) {
1749 for (int j = 0; j < minFGroup[i].size(); j++) {
1750 for (int k = 0; k < minRGroup.size(); k++) {
1751 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1758 if (matches.size() == 1) {
1761 fStart = minFPos[0];
1762 rStart = minRPos[0];
1765 //we have an acceptable match for the forward and reverse, but do they match?
1767 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1768 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1770 }else { success = pdiffs + 100; }
1774 if (alignment != NULL) { delete alignment; }
1780 catch(exception& e) {
1781 m->errorOut(e, "TrimOligos", "stripIForward");
1786 //*******************************************************************/
1787 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1790 string rawSequence = seq.getUnaligned();
1791 int success = bdiffs + 1; //guilty until proven innocent
1793 //can you find the barcode
1794 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1795 string oligo = it->first;
1796 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1797 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1801 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1803 seq.setUnaligned(rawSequence.substr(oligo.length()));
1810 //if you found the barcode or if you don't want to allow for diffs
1811 if ((bdiffs == 0) || (success == 0)) { return success; }
1813 else { //try aligning and see if you can find it
1814 Alignment* alignment;
1815 if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1816 else{ alignment = NULL; }
1818 //can you find the barcode
1824 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1825 string oligo = it->first;
1826 // int length = oligo.length();
1828 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
1829 success = bdiffs + 10;
1833 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1834 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1835 oligo = alignment->getSeqAAln();
1836 string temp = alignment->getSeqBAln();
1838 int alnLength = oligo.length();
1840 for(int i=oligo.length()-1;i>=0;i--){
1841 if(oligo[i] != '-'){ alnLength = i+1; break; }
1843 oligo = oligo.substr(0,alnLength);
1844 temp = temp.substr(0,alnLength);
1846 int numDiff = countDiffs(oligo, temp);
1848 if(numDiff < minDiff){
1851 minGroup = it->second;
1853 for(int i=0;i<alnLength;i++){
1859 else if(numDiff == minDiff){
1865 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1866 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1867 else{ //use the best match
1869 seq.setUnaligned(rawSequence.substr(minPos));
1873 if (alignment != NULL) { delete alignment; }
1880 catch(exception& e) {
1881 m->errorOut(e, "TrimOligos", "stripBarcode");
1887 /********************************************************************/
1888 int TrimOligos::stripForward(Sequence& seq, int& group){
1891 string rawSequence = seq.getUnaligned();
1892 int success = pdiffs + 1; //guilty until proven innocent
1894 //can you find the primer
1895 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1896 string oligo = it->first;
1897 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1898 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1902 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1904 seq.setUnaligned(rawSequence.substr(oligo.length()));
1910 //if you found the barcode or if you don't want to allow for diffs
1911 if ((pdiffs == 0) || (success == 0)) { return success; }
1913 else { //try aligning and see if you can find it
1914 Alignment* alignment;
1915 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1916 else{ alignment = NULL; }
1918 //can you find the barcode
1924 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1925 string oligo = it->first;
1926 // int length = oligo.length();
1928 if(rawSequence.length() < maxFPrimerLength){
1929 success = pdiffs + 100;
1933 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1934 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1935 oligo = alignment->getSeqAAln();
1936 string temp = alignment->getSeqBAln();
1938 int alnLength = oligo.length();
1940 for(int i=oligo.length()-1;i>=0;i--){
1941 if(oligo[i] != '-'){ alnLength = i+1; break; }
1943 oligo = oligo.substr(0,alnLength);
1944 temp = temp.substr(0,alnLength);
1946 int numDiff = countDiffs(oligo, temp);
1948 if(numDiff < minDiff){
1951 minGroup = it->second;
1953 for(int i=0;i<alnLength;i++){
1959 else if(numDiff == minDiff){
1965 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1966 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1967 else{ //use the best match
1969 seq.setUnaligned(rawSequence.substr(minPos));
1973 if (alignment != NULL) { delete alignment; }
1980 catch(exception& e) {
1981 m->errorOut(e, "TrimOligos", "stripForward");
1985 //*******************************************************************/
1986 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1989 if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
1991 string rawSequence = seq.getUnaligned();
1992 int success = pdiffs + 1; //guilty until proven innocent
1994 //can you find the primer
1995 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1996 string oligo = it->first;
1997 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1998 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
2002 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2004 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
2005 if(qual.getName() != ""){
2006 if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
2013 //if you found the barcode or if you don't want to allow for diffs
2014 if ((pdiffs == 0) || (success == 0)) { return success; }
2016 else { //try aligning and see if you can find it
2017 Alignment* alignment;
2018 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
2019 else{ alignment = NULL; }
2021 //can you find the barcode
2027 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2028 string oligo = it->first;
2029 // int length = oligo.length();
2031 if(rawSequence.length() < maxFPrimerLength){
2032 success = pdiffs + 100;
2036 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2037 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
2038 oligo = alignment->getSeqAAln();
2039 string temp = alignment->getSeqBAln();
2041 int alnLength = oligo.length();
2043 for(int i=oligo.length()-1;i>=0;i--){
2044 if(oligo[i] != '-'){ alnLength = i+1; break; }
2046 oligo = oligo.substr(0,alnLength);
2047 temp = temp.substr(0,alnLength);
2049 int numDiff = countDiffs(oligo, temp);
2051 if(numDiff < minDiff){
2054 minGroup = it->second;
2056 for(int i=0;i<alnLength;i++){
2062 else if(numDiff == minDiff){
2068 if(minDiff > pdiffs) { success = minDiff; } //no good matches
2069 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
2070 else{ //use the best match
2072 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2073 if(qual.getName() != ""){
2074 if (!keepForward) { qual.trimQScores(minPos, -1); }
2079 if (alignment != NULL) { delete alignment; }
2086 catch(exception& e) {
2087 m->errorOut(e, "TrimOligos", "stripForward");
2091 //******************************************************************/
2092 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2094 string rawSequence = seq.getUnaligned();
2095 bool success = 0; //guilty until proven innocent
2097 for(int i=0;i<revPrimer.size();i++){
2098 string oligo = revPrimer[i];
2100 if(rawSequence.length() < oligo.length()){
2105 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2106 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2107 if(qual.getName() != ""){
2108 qual.trimQScores(-1, rawSequence.length()-oligo.length());
2117 catch(exception& e) {
2118 m->errorOut(e, "TrimOligos", "stripReverse");
2122 //******************************************************************/
2123 bool TrimOligos::stripReverse(Sequence& seq){
2126 string rawSequence = seq.getUnaligned();
2127 bool success = 0; //guilty until proven innocent
2129 for(int i=0;i<revPrimer.size();i++){
2130 string oligo = revPrimer[i];
2132 if(rawSequence.length() < oligo.length()){
2137 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2138 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2147 catch(exception& e) {
2148 m->errorOut(e, "TrimOligos", "stripReverse");
2152 //******************************************************************/
2153 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2155 string rawSequence = seq.getUnaligned();
2156 bool success = ldiffs + 1; //guilty until proven innocent
2158 for(int i=0;i<linker.size();i++){
2159 string oligo = linker[i];
2161 if(rawSequence.length() < oligo.length()){
2162 success = ldiffs + 10;
2166 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2167 seq.setUnaligned(rawSequence.substr(oligo.length()));
2168 if(qual.getName() != ""){
2169 qual.trimQScores(oligo.length(), -1);
2176 //if you found the linker or if you don't want to allow for diffs
2177 if ((ldiffs == 0) || (success == 0)) { return success; }
2179 else { //try aligning and see if you can find it
2180 Alignment* alignment;
2181 if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2182 else{ alignment = NULL; }
2184 //can you find the barcode
2189 for(int i = 0; i < linker.size(); i++){
2190 string oligo = linker[i];
2191 // int length = oligo.length();
2193 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2194 success = ldiffs + 10;
2198 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2199 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2200 oligo = alignment->getSeqAAln();
2201 string temp = alignment->getSeqBAln();
2203 int alnLength = oligo.length();
2205 for(int i=oligo.length()-1;i>=0;i--){
2206 if(oligo[i] != '-'){ alnLength = i+1; break; }
2208 oligo = oligo.substr(0,alnLength);
2209 temp = temp.substr(0,alnLength);
2211 int numDiff = countDiffs(oligo, temp);
2213 if(numDiff < minDiff){
2217 for(int i=0;i<alnLength;i++){
2223 else if(numDiff == minDiff){
2229 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2230 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2231 else{ //use the best match
2232 seq.setUnaligned(rawSequence.substr(minPos));
2234 if(qual.getName() != ""){
2235 qual.trimQScores(minPos, -1);
2240 if (alignment != NULL) { delete alignment; }
2248 catch(exception& e) {
2249 m->errorOut(e, "TrimOligos", "stripLinker");
2253 //******************************************************************/
2254 bool TrimOligos::stripLinker(Sequence& seq){
2257 string rawSequence = seq.getUnaligned();
2258 bool success = ldiffs +1; //guilty until proven innocent
2260 for(int i=0;i<linker.size();i++){
2261 string oligo = linker[i];
2263 if(rawSequence.length() < oligo.length()){
2264 success = ldiffs +10;
2268 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2269 seq.setUnaligned(rawSequence.substr(oligo.length()));
2275 //if you found the linker or if you don't want to allow for diffs
2276 if ((ldiffs == 0) || (success == 0)) { return success; }
2278 else { //try aligning and see if you can find it
2279 Alignment* alignment;
2280 if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2281 else{ alignment = NULL; }
2283 //can you find the barcode
2288 for(int i = 0; i < linker.size(); i++){
2289 string oligo = linker[i];
2290 // int length = oligo.length();
2292 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
2293 success = ldiffs + 10;
2297 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2298 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2299 oligo = alignment->getSeqAAln();
2300 string temp = alignment->getSeqBAln();
2302 int alnLength = oligo.length();
2304 for(int i=oligo.length()-1;i>=0;i--){
2305 if(oligo[i] != '-'){ alnLength = i+1; break; }
2307 oligo = oligo.substr(0,alnLength);
2308 temp = temp.substr(0,alnLength);
2310 int numDiff = countDiffs(oligo, temp);
2312 if(numDiff < minDiff){
2316 for(int i=0;i<alnLength;i++){
2322 else if(numDiff == minDiff){
2328 if(minDiff > ldiffs) { success = minDiff; } //no good matches
2329 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
2330 else{ //use the best match
2331 seq.setUnaligned(rawSequence.substr(minPos));
2335 if (alignment != NULL) { delete alignment; }
2342 catch(exception& e) {
2343 m->errorOut(e, "TrimOligos", "stripLinker");
2348 //******************************************************************/
2349 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2351 string rawSequence = seq.getUnaligned();
2352 bool success = sdiffs+1; //guilty until proven innocent
2354 for(int i=0;i<spacer.size();i++){
2355 string oligo = spacer[i];
2357 if(rawSequence.length() < oligo.length()){
2358 success = sdiffs+10;
2362 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2363 seq.setUnaligned(rawSequence.substr(oligo.length()));
2364 if(qual.getName() != ""){
2365 qual.trimQScores(oligo.length(), -1);
2372 //if you found the spacer or if you don't want to allow for diffs
2373 if ((sdiffs == 0) || (success == 0)) { return success; }
2375 else { //try aligning and see if you can find it
2376 Alignment* alignment;
2377 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2378 else{ alignment = NULL; }
2380 //can you find the barcode
2385 for(int i = 0; i < spacer.size(); i++){
2386 string oligo = spacer[i];
2387 // int length = oligo.length();
2389 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2390 success = sdiffs + 10;
2394 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2395 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2396 oligo = alignment->getSeqAAln();
2397 string temp = alignment->getSeqBAln();
2399 int alnLength = oligo.length();
2401 for(int i=oligo.length()-1;i>=0;i--){
2402 if(oligo[i] != '-'){ alnLength = i+1; break; }
2404 oligo = oligo.substr(0,alnLength);
2405 temp = temp.substr(0,alnLength);
2407 int numDiff = countDiffs(oligo, temp);
2409 if(numDiff < minDiff){
2413 for(int i=0;i<alnLength;i++){
2419 else if(numDiff == minDiff){
2425 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2426 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2427 else{ //use the best match
2428 seq.setUnaligned(rawSequence.substr(minPos));
2430 if(qual.getName() != ""){
2431 qual.trimQScores(minPos, -1);
2436 if (alignment != NULL) { delete alignment; }
2444 catch(exception& e) {
2445 m->errorOut(e, "TrimOligos", "stripSpacer");
2449 //******************************************************************/
2450 bool TrimOligos::stripSpacer(Sequence& seq){
2453 string rawSequence = seq.getUnaligned();
2454 bool success = sdiffs+1; //guilty until proven innocent
2456 for(int i=0;i<spacer.size();i++){
2457 string oligo = spacer[i];
2459 if(rawSequence.length() < oligo.length()){
2460 success = sdiffs+10;
2464 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2465 seq.setUnaligned(rawSequence.substr(oligo.length()));
2471 //if you found the spacer or if you don't want to allow for diffs
2472 if ((sdiffs == 0) || (success == 0)) { return success; }
2474 else { //try aligning and see if you can find it
2475 Alignment* alignment;
2476 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2477 else{ alignment = NULL; }
2479 //can you find the barcode
2484 for(int i = 0; i < spacer.size(); i++){
2485 string oligo = spacer[i];
2486 // int length = oligo.length();
2488 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2489 success = sdiffs + 10;
2493 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2494 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2495 oligo = alignment->getSeqAAln();
2496 string temp = alignment->getSeqBAln();
2498 int alnLength = oligo.length();
2500 for(int i=oligo.length()-1;i>=0;i--){
2501 if(oligo[i] != '-'){ alnLength = i+1; break; }
2503 oligo = oligo.substr(0,alnLength);
2504 temp = temp.substr(0,alnLength);
2506 int numDiff = countDiffs(oligo, temp);
2508 if(numDiff < minDiff){
2512 for(int i=0;i<alnLength;i++){
2518 else if(numDiff == minDiff){
2524 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2525 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2526 else{ //use the best match
2527 seq.setUnaligned(rawSequence.substr(minPos));
2531 if (alignment != NULL) { delete alignment; }
2538 catch(exception& e) {
2539 m->errorOut(e, "TrimOligos", "stripSpacer");
2544 //******************************************************************/
2545 bool TrimOligos::compareDNASeq(string oligo, string seq){
2548 int length = oligo.length();
2550 for(int i=0;i<length;i++){
2552 if(oligo[i] != seq[i]){
2553 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
2554 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
2555 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
2556 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
2557 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
2558 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2559 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
2560 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2561 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2562 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2563 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
2564 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2566 if(success == 0) { break; }
2575 catch(exception& e) {
2576 m->errorOut(e, "TrimOligos", "compareDNASeq");
2581 //********************************************************************/
2582 int TrimOligos::countDiffs(string oligo, string seq){
2585 int length = oligo.length();
2588 for(int i=0;i<length;i++){
2590 if(oligo[i] != seq[i]){
2591 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
2592 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
2593 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
2594 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
2595 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
2596 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2597 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
2598 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2599 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2600 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2601 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
2602 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2609 catch(exception& e) {
2610 m->errorOut(e, "TrimOligos", "countDiffs");
2614 //********************************************************************/
2615 string TrimOligos::reverseOligo(string oligo){
2617 string reverse = "";
2619 for(int i=oligo.length()-1;i>=0;i--){
2621 if(oligo[i] == 'A') { reverse += 'T'; }
2622 else if(oligo[i] == 'T'){ reverse += 'A'; }
2623 else if(oligo[i] == 'U'){ reverse += 'A'; }
2625 else if(oligo[i] == 'G'){ reverse += 'C'; }
2626 else if(oligo[i] == 'C'){ reverse += 'G'; }
2628 else if(oligo[i] == 'R'){ reverse += 'Y'; }
2629 else if(oligo[i] == 'Y'){ reverse += 'R'; }
2631 else if(oligo[i] == 'M'){ reverse += 'K'; }
2632 else if(oligo[i] == 'K'){ reverse += 'M'; }
2634 else if(oligo[i] == 'W'){ reverse += 'W'; }
2635 else if(oligo[i] == 'S'){ reverse += 'S'; }
2637 else if(oligo[i] == 'B'){ reverse += 'V'; }
2638 else if(oligo[i] == 'V'){ reverse += 'B'; }
2640 else if(oligo[i] == 'D'){ reverse += 'H'; }
2641 else if(oligo[i] == 'H'){ reverse += 'D'; }
2643 else if(oligo[i] == '-'){ reverse += '-'; }
2644 else { reverse += 'N'; }
2650 catch(exception& e) {
2651 m->errorOut(e, "TrimOligos", "reverseOligo");
2656 /********************************************************************/