5 * Created by westcott on 9/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
19 m = MothurOut::getInstance();
31 maxFBarcodeLength = 0;
32 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
33 if(it->first.length() > maxFBarcodeLength){
34 maxFBarcodeLength = it->first.length();
38 map<string,int>::iterator it;
39 for(it=primers.begin();it!=primers.end();it++){
40 if(it->first.length() > maxFPrimerLength){
41 maxFPrimerLength = it->first.length();
46 for(int i = 0; i < linker.size(); i++){
47 if(linker[i].length() > maxLinkerLength){
48 maxLinkerLength = linker[i].length();
53 for(int i = 0; i < spacer.size(); i++){
54 if(spacer[i].length() > maxSpacerLength){
55 maxSpacerLength = spacer[i].length();
60 m->errorOut(e, "TrimOligos", "TrimOligos");
64 /********************************************************************/
65 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
66 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
68 m = MothurOut::getInstance();
75 maxFBarcodeLength = 0;
76 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
77 string forward = it->second.forward;
78 map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
80 if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
82 if (itForward == ifbarcodes.end()) {
83 vector<int> temp; temp.push_back(it->first);
84 ifbarcodes[forward] = temp;
85 }else { itForward->second.push_back(it->first); }
89 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
90 string forward = it->second.forward;
91 map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
93 if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
95 if (itForward == ifprimers.end()) {
96 vector<int> temp; temp.push_back(it->first);
97 ifprimers[forward] = temp;
98 }else { itForward->second.push_back(it->first); }
101 maxRBarcodeLength = 0;
102 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
103 string forward = it->second.reverse;
104 map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
106 if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
108 if (itForward == irbarcodes.end()) {
109 vector<int> temp; temp.push_back(it->first);
110 irbarcodes[forward] = temp;
111 }else { itForward->second.push_back(it->first); }
114 maxRPrimerLength = 0;
115 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
116 string forward = it->second.reverse;
117 map<string, vector<int> >::iterator itForward = irprimers.find(forward);
119 if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
121 if (itForward == irprimers.end()) {
122 vector<int> temp; temp.push_back(it->first);
123 irprimers[forward] = temp;
124 }else { itForward->second.push_back(it->first); }
130 catch(exception& e) {
131 m->errorOut(e, "TrimOligos", "TrimOligos");
135 /********************************************************************/
136 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
137 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
139 m = MothurOut::getInstance();
148 catch(exception& e) {
149 m->errorOut(e, "TrimOligos", "TrimOligos");
153 /********************************************************************/
154 TrimOligos::~TrimOligos() {}
155 //*******************************************************************/
156 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
159 string rawSequence = seq.getUnaligned();
160 int success = bdiffs + 1; //guilty until proven innocent
162 //can you find the barcode
163 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
164 string oligo = it->first;
165 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
166 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
170 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
172 seq.setUnaligned(rawSequence.substr(oligo.length()));
174 if(qual.getName() != ""){
175 qual.trimQScores(oligo.length(), -1);
183 //if you found the barcode or if you don't want to allow for diffs
184 if ((bdiffs == 0) || (success == 0)) { return success; }
186 else { //try aligning and see if you can find it
187 Alignment* alignment;
188 if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
189 else{ alignment = NULL; }
191 //can you find the barcode
197 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
198 string oligo = it->first;
199 // int length = oligo.length();
201 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
202 success = bdiffs + 10;
206 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
207 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
208 oligo = alignment->getSeqAAln();
209 string temp = alignment->getSeqBAln();
211 int alnLength = oligo.length();
213 for(int i=oligo.length()-1;i>=0;i--){
214 if(oligo[i] != '-'){ alnLength = i+1; break; }
216 oligo = oligo.substr(0,alnLength);
217 temp = temp.substr(0,alnLength);
218 int numDiff = countDiffs(oligo, temp);
220 if(numDiff < minDiff){
223 minGroup = it->second;
225 for(int i=0;i<alnLength;i++){
231 else if(numDiff == minDiff){
237 if(minDiff > bdiffs) { success = minDiff; } //no good matches
238 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
239 else{ //use the best match
241 seq.setUnaligned(rawSequence.substr(minPos));
243 if(qual.getName() != ""){
244 qual.trimQScores(minPos, -1);
249 if (alignment != NULL) { delete alignment; }
256 catch(exception& e) {
257 m->errorOut(e, "TrimOligos", "stripBarcode");
261 //*******************************************************************/
262 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
264 //look for forward barcode
265 string rawFSequence = forwardSeq.getUnaligned();
266 string rawRSequence = reverseSeq.getUnaligned();
267 int success = bdiffs + 1; //guilty until proven innocent
269 //can you find the forward barcode
270 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
271 string foligo = it->second.forward;
272 string roligo = it->second.reverse;
274 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
275 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
278 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
279 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
283 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
285 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
286 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
292 //if you found the barcode or if you don't want to allow for diffs
293 if ((bdiffs == 0) || (success == 0)) { return success; }
294 else { //try aligning and see if you can find it
295 Alignment* alignment;
296 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
297 else{ alignment = NULL; }
299 //can you find the barcode
302 vector< vector<int> > minFGroup;
305 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
315 only want to look for forward = Sarah, John, Anna, Pat, Gail
316 reverse = Westcott, Schloss, Brown, Moore
318 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
320 //cout << endl << forwardSeq.getName() << endl;
321 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
322 string oligo = it->first;
324 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
325 success = bdiffs + 10;
328 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
329 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
330 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
331 oligo = alignment->getSeqAAln();
332 string temp = alignment->getSeqBAln();
334 int alnLength = oligo.length();
336 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
337 oligo = oligo.substr(0,alnLength);
338 temp = temp.substr(0,alnLength);
339 int numDiff = countDiffs(oligo, temp);
341 if (alnLength == 0) { numDiff = bdiffs + 100; }
342 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
344 if(numDiff < minDiff){
348 minFGroup.push_back(it->second);
351 for(int i=0;i<alnLength;i++){
356 minFPos.push_back(tempminFPos);
357 }else if(numDiff == minDiff){
358 minFGroup.push_back(it->second);
360 for(int i=0;i<alnLength;i++){
365 minFPos.push_back(tempminFPos);
369 //cout << minDiff << '\t' << minCount << '\t' << endl;
370 if(minDiff > bdiffs) { success = minDiff; } //no good matches
372 //check for reverse match
373 if (alignment != NULL) { delete alignment; }
375 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
376 else{ alignment = NULL; }
378 //can you find the barcode
381 vector< vector<int> > minRGroup;
384 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
385 string oligo = it->first;
386 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
387 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
388 success = bdiffs + 10;
392 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
393 alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
394 oligo = alignment->getSeqAAln();
395 string temp = alignment->getSeqBAln();
397 int alnLength = oligo.length();
398 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
399 oligo = oligo.substr(0,alnLength);
400 temp = temp.substr(0,alnLength);
401 int numDiff = countDiffs(oligo, temp);
402 if (alnLength == 0) { numDiff = bdiffs + 100; }
404 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
405 if(numDiff < minDiff){
409 minRGroup.push_back(it->second);
412 for(int i=0;i<alnLength;i++){
417 minRPos.push_back(tempminRPos);
418 }else if(numDiff == minDiff){
420 for(int i=0;i<alnLength;i++){
425 minRPos.push_back(tempminRPos);
426 minRGroup.push_back(it->second);
431 /*cout << minDiff << '\t' << minCount << '\t' << endl;
432 for (int i = 0; i < minFGroup.size(); i++) {
434 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
438 for (int i = 0; i < minRGroup.size(); i++) {
440 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
444 if(minDiff > bdiffs) { success = minDiff; } //no good matches
446 bool foundMatch = false;
448 for (int i = 0; i < minFGroup.size(); i++) {
449 for (int j = 0; j < minFGroup[i].size(); j++) {
450 for (int k = 0; k < minRGroup.size(); k++) {
451 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
458 if (matches.size() == 1) {
465 //we have an acceptable match for the forward and reverse, but do they match?
467 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
468 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
470 }else { success = bdiffs + 100; }
474 if (alignment != NULL) { delete alignment; }
480 catch(exception& e) {
481 m->errorOut(e, "TrimOligos", "stripIBarcode");
486 //*******************************************************************/
487 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
489 //look for forward barcode
490 string rawFSequence = forwardSeq.getUnaligned();
491 string rawRSequence = reverseSeq.getUnaligned();
492 int success = bdiffs + 1; //guilty until proven innocent
494 //can you find the forward barcode
495 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
496 string foligo = it->second.forward;
497 string roligo = it->second.reverse;
499 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
500 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
503 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
504 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
508 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
510 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
511 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
512 forwardQual.trimQScores(foligo.length(), -1);
513 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
519 //if you found the barcode or if you don't want to allow for diffs
520 if ((bdiffs == 0) || (success == 0)) { return success; }
521 else { //try aligning and see if you can find it
522 Alignment* alignment;
523 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
524 else{ alignment = NULL; }
526 //can you find the barcode
529 vector< vector<int> > minFGroup;
532 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
542 only want to look for forward = Sarah, John, Anna, Pat, Gail
543 reverse = Westcott, Schloss, Brown, Moore
545 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
547 //cout << endl << forwardSeq.getName() << endl;
548 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
549 string oligo = it->first;
551 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
552 success = bdiffs + 10;
555 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
556 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
557 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
558 oligo = alignment->getSeqAAln();
559 string temp = alignment->getSeqBAln();
561 int alnLength = oligo.length();
563 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
564 oligo = oligo.substr(0,alnLength);
565 temp = temp.substr(0,alnLength);
566 int numDiff = countDiffs(oligo, temp);
568 if (alnLength == 0) { numDiff = bdiffs + 100; }
569 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
571 if(numDiff < minDiff){
575 minFGroup.push_back(it->second);
578 for(int i=0;i<alnLength;i++){
583 minFPos.push_back(tempminFPos);
584 }else if(numDiff == minDiff){
585 minFGroup.push_back(it->second);
587 for(int i=0;i<alnLength;i++){
592 minFPos.push_back(tempminFPos);
596 //cout << minDiff << '\t' << minCount << '\t' << endl;
597 if(minDiff > bdiffs) { success = minDiff; } //no good matches
599 //check for reverse match
600 if (alignment != NULL) { delete alignment; }
602 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
603 else{ alignment = NULL; }
605 //can you find the barcode
608 vector< vector<int> > minRGroup;
611 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
612 string oligo = it->first;
613 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
614 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
615 success = bdiffs + 10;
619 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
620 alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
621 oligo = alignment->getSeqAAln();
622 string temp = alignment->getSeqBAln();
624 int alnLength = oligo.length();
625 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
626 oligo = oligo.substr(0,alnLength);
627 temp = temp.substr(0,alnLength);
628 int numDiff = countDiffs(oligo, temp);
629 if (alnLength == 0) { numDiff = bdiffs + 100; }
631 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
632 if(numDiff < minDiff){
636 minRGroup.push_back(it->second);
639 for(int i=0;i<alnLength;i++){
644 minRPos.push_back(tempminRPos);
645 }else if(numDiff == minDiff){
647 for(int i=0;i<alnLength;i++){
652 minRPos.push_back(tempminRPos);
653 minRGroup.push_back(it->second);
658 /*cout << minDiff << '\t' << minCount << '\t' << endl;
659 for (int i = 0; i < minFGroup.size(); i++) {
661 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
665 for (int i = 0; i < minRGroup.size(); i++) {
667 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
671 if(minDiff > bdiffs) { success = minDiff; } //no good matches
673 bool foundMatch = false;
675 for (int i = 0; i < minFGroup.size(); i++) {
676 for (int j = 0; j < minFGroup[i].size(); j++) {
677 for (int k = 0; k < minRGroup.size(); k++) {
678 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
685 if (matches.size() == 1) {
692 //we have an acceptable match for the forward and reverse, but do they match?
694 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
695 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
696 forwardQual.trimQScores(fStart, -1);
697 reverseQual.trimQScores(rStart, -1);
699 }else { success = bdiffs + 100; }
703 if (alignment != NULL) { delete alignment; }
709 catch(exception& e) {
710 m->errorOut(e, "TrimOligos", "stripIBarcode");
715 //*******************************************************************/
716 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
718 //look for forward barcode
719 string rawFSequence = forwardSeq.getUnaligned();
720 string rawRSequence = reverseSeq.getUnaligned();
721 int success = pdiffs + 1; //guilty until proven innocent
723 //can you find the forward barcode
724 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
725 string foligo = it->second.forward;
726 string roligo = it->second.reverse;
728 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
729 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
732 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
733 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
737 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
739 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
740 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
741 forwardQual.trimQScores(foligo.length(), -1);
742 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
748 //if you found the barcode or if you don't want to allow for diffs
749 if ((pdiffs == 0) || (success == 0)) { return success; }
750 else { //try aligning and see if you can find it
751 Alignment* alignment;
752 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
753 else{ alignment = NULL; }
755 //can you find the barcode
758 vector< vector<int> > minFGroup;
761 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
771 only want to look for forward = Sarah, John, Anna, Pat, Gail
772 reverse = Westcott, Schloss, Brown, Moore
774 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
776 //cout << endl << forwardSeq.getName() << endl;
777 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
778 string oligo = it->first;
780 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
781 success = pdiffs + 10;
784 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
785 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
786 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
787 oligo = alignment->getSeqAAln();
788 string temp = alignment->getSeqBAln();
790 int alnLength = oligo.length();
792 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
793 oligo = oligo.substr(0,alnLength);
794 temp = temp.substr(0,alnLength);
795 int numDiff = countDiffs(oligo, temp);
797 if (alnLength == 0) { numDiff = pdiffs + 100; }
798 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
800 if(numDiff < minDiff){
804 minFGroup.push_back(it->second);
807 for(int i=0;i<alnLength;i++){
812 minFPos.push_back(tempminFPos);
813 }else if(numDiff == minDiff){
814 minFGroup.push_back(it->second);
816 for(int i=0;i<alnLength;i++){
821 minFPos.push_back(tempminFPos);
825 //cout << minDiff << '\t' << minCount << '\t' << endl;
826 if(minDiff > pdiffs) { success = minDiff; } //no good matches
828 //check for reverse match
829 if (alignment != NULL) { delete alignment; }
831 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
832 else{ alignment = NULL; }
834 //can you find the barcode
837 vector< vector<int> > minRGroup;
840 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
841 string oligo = it->first;
842 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
843 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
844 success = pdiffs + 10;
848 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
849 alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
850 oligo = alignment->getSeqAAln();
851 string temp = alignment->getSeqBAln();
853 int alnLength = oligo.length();
854 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
855 oligo = oligo.substr(0,alnLength);
856 temp = temp.substr(0,alnLength);
857 int numDiff = countDiffs(oligo, temp);
858 if (alnLength == 0) { numDiff = pdiffs + 100; }
860 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
861 if(numDiff < minDiff){
865 minRGroup.push_back(it->second);
868 for(int i=0;i<alnLength;i++){
873 minRPos.push_back(tempminRPos);
874 }else if(numDiff == minDiff){
876 for(int i=0;i<alnLength;i++){
881 minRPos.push_back(tempminRPos);
882 minRGroup.push_back(it->second);
887 /*cout << minDiff << '\t' << minCount << '\t' << endl;
888 for (int i = 0; i < minFGroup.size(); i++) {
890 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
894 for (int i = 0; i < minRGroup.size(); i++) {
896 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
900 if(minDiff > pdiffs) { success = minDiff; } //no good matches
902 bool foundMatch = false;
904 for (int i = 0; i < minFGroup.size(); i++) {
905 for (int j = 0; j < minFGroup[i].size(); j++) {
906 for (int k = 0; k < minRGroup.size(); k++) {
907 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
914 if (matches.size() == 1) {
921 //we have an acceptable match for the forward and reverse, but do they match?
923 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
924 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
925 forwardQual.trimQScores(fStart, -1);
926 reverseQual.trimQScores(rStart, -1);
928 }else { success = pdiffs + 100; }
932 if (alignment != NULL) { delete alignment; }
938 catch(exception& e) {
939 m->errorOut(e, "TrimOligos", "stripIForward");
944 //*******************************************************************/
945 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
947 //look for forward barcode
948 string rawFSequence = forwardSeq.getUnaligned();
949 string rawRSequence = reverseSeq.getUnaligned();
950 int success = pdiffs + 1; //guilty until proven innocent
952 //can you find the forward barcode
953 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
954 string foligo = it->second.forward;
955 string roligo = it->second.reverse;
957 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
958 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
961 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
962 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
966 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
968 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
969 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
975 //if you found the barcode or if you don't want to allow for diffs
976 if ((pdiffs == 0) || (success == 0)) { return success; }
977 else { //try aligning and see if you can find it
978 Alignment* alignment;
979 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
980 else{ alignment = NULL; }
982 //can you find the barcode
985 vector< vector<int> > minFGroup;
988 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
998 only want to look for forward = Sarah, John, Anna, Pat, Gail
999 reverse = Westcott, Schloss, Brown, Moore
1001 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1003 //cout << endl << forwardSeq.getName() << endl;
1004 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1005 string oligo = it->first;
1007 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1008 success = pdiffs + 10;
1011 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1012 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1013 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1014 oligo = alignment->getSeqAAln();
1015 string temp = alignment->getSeqBAln();
1017 int alnLength = oligo.length();
1019 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1020 oligo = oligo.substr(0,alnLength);
1021 temp = temp.substr(0,alnLength);
1022 int numDiff = countDiffs(oligo, temp);
1024 if (alnLength == 0) { numDiff = pdiffs + 100; }
1025 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1027 if(numDiff < minDiff){
1031 minFGroup.push_back(it->second);
1032 int tempminFPos = 0;
1034 for(int i=0;i<alnLength;i++){
1039 minFPos.push_back(tempminFPos);
1040 }else if(numDiff == minDiff){
1041 minFGroup.push_back(it->second);
1042 int tempminFPos = 0;
1043 for(int i=0;i<alnLength;i++){
1048 minFPos.push_back(tempminFPos);
1052 //cout << minDiff << '\t' << minCount << '\t' << endl;
1053 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1055 //check for reverse match
1056 if (alignment != NULL) { delete alignment; }
1058 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1059 else{ alignment = NULL; }
1061 //can you find the barcode
1064 vector< vector<int> > minRGroup;
1065 vector<int> minRPos;
1067 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1068 string oligo = it->first;
1069 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1070 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1071 success = pdiffs + 10;
1075 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1076 alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1077 oligo = alignment->getSeqAAln();
1078 string temp = alignment->getSeqBAln();
1080 int alnLength = oligo.length();
1081 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1082 oligo = oligo.substr(0,alnLength);
1083 temp = temp.substr(0,alnLength);
1084 int numDiff = countDiffs(oligo, temp);
1085 if (alnLength == 0) { numDiff = pdiffs + 100; }
1087 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1088 if(numDiff < minDiff){
1092 minRGroup.push_back(it->second);
1093 int tempminRPos = 0;
1095 for(int i=0;i<alnLength;i++){
1100 minRPos.push_back(tempminRPos);
1101 }else if(numDiff == minDiff){
1102 int tempminRPos = 0;
1103 for(int i=0;i<alnLength;i++){
1108 minRPos.push_back(tempminRPos);
1109 minRGroup.push_back(it->second);
1114 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1115 for (int i = 0; i < minFGroup.size(); i++) {
1117 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1121 for (int i = 0; i < minRGroup.size(); i++) {
1123 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1127 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1129 bool foundMatch = false;
1130 vector<int> matches;
1131 for (int i = 0; i < minFGroup.size(); i++) {
1132 for (int j = 0; j < minFGroup[i].size(); j++) {
1133 for (int k = 0; k < minRGroup.size(); k++) {
1134 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1141 if (matches.size() == 1) {
1144 fStart = minFPos[0];
1145 rStart = minRPos[0];
1148 //we have an acceptable match for the forward and reverse, but do they match?
1150 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1151 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1153 }else { success = pdiffs + 100; }
1157 if (alignment != NULL) { delete alignment; }
1163 catch(exception& e) {
1164 m->errorOut(e, "TrimOligos", "stripIForward");
1169 //*******************************************************************/
1170 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1173 string rawSequence = seq.getUnaligned();
1174 int success = bdiffs + 1; //guilty until proven innocent
1176 //can you find the barcode
1177 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1178 string oligo = it->first;
1179 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1180 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1184 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1186 seq.setUnaligned(rawSequence.substr(oligo.length()));
1193 //if you found the barcode or if you don't want to allow for diffs
1194 if ((bdiffs == 0) || (success == 0)) { return success; }
1196 else { //try aligning and see if you can find it
1197 Alignment* alignment;
1198 if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1199 else{ alignment = NULL; }
1201 //can you find the barcode
1207 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1208 string oligo = it->first;
1209 // int length = oligo.length();
1211 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
1212 success = bdiffs + 10;
1216 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1217 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1218 oligo = alignment->getSeqAAln();
1219 string temp = alignment->getSeqBAln();
1221 int alnLength = oligo.length();
1223 for(int i=oligo.length()-1;i>=0;i--){
1224 if(oligo[i] != '-'){ alnLength = i+1; break; }
1226 oligo = oligo.substr(0,alnLength);
1227 temp = temp.substr(0,alnLength);
1229 int numDiff = countDiffs(oligo, temp);
1231 if(numDiff < minDiff){
1234 minGroup = it->second;
1236 for(int i=0;i<alnLength;i++){
1242 else if(numDiff == minDiff){
1248 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1249 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1250 else{ //use the best match
1252 seq.setUnaligned(rawSequence.substr(minPos));
1256 if (alignment != NULL) { delete alignment; }
1263 catch(exception& e) {
1264 m->errorOut(e, "TrimOligos", "stripBarcode");
1270 /********************************************************************/
1271 int TrimOligos::stripForward(Sequence& seq, int& group){
1274 string rawSequence = seq.getUnaligned();
1275 int success = pdiffs + 1; //guilty until proven innocent
1277 //can you find the primer
1278 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1279 string oligo = it->first;
1280 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1281 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1285 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1287 seq.setUnaligned(rawSequence.substr(oligo.length()));
1293 //if you found the barcode or if you don't want to allow for diffs
1294 if ((pdiffs == 0) || (success == 0)) { return success; }
1296 else { //try aligning and see if you can find it
1297 Alignment* alignment;
1298 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1299 else{ alignment = NULL; }
1301 //can you find the barcode
1307 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1308 string oligo = it->first;
1309 // int length = oligo.length();
1311 if(rawSequence.length() < maxFPrimerLength){
1312 success = pdiffs + 100;
1316 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1317 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1318 oligo = alignment->getSeqAAln();
1319 string temp = alignment->getSeqBAln();
1321 int alnLength = oligo.length();
1323 for(int i=oligo.length()-1;i>=0;i--){
1324 if(oligo[i] != '-'){ alnLength = i+1; break; }
1326 oligo = oligo.substr(0,alnLength);
1327 temp = temp.substr(0,alnLength);
1329 int numDiff = countDiffs(oligo, temp);
1331 if(numDiff < minDiff){
1334 minGroup = it->second;
1336 for(int i=0;i<alnLength;i++){
1342 else if(numDiff == minDiff){
1348 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1349 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1350 else{ //use the best match
1352 seq.setUnaligned(rawSequence.substr(minPos));
1356 if (alignment != NULL) { delete alignment; }
1363 catch(exception& e) {
1364 m->errorOut(e, "TrimOligos", "stripForward");
1368 //*******************************************************************/
1369 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1371 string rawSequence = seq.getUnaligned();
1372 int success = pdiffs + 1; //guilty until proven innocent
1374 //can you find the primer
1375 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1376 string oligo = it->first;
1377 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1378 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1382 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1384 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1385 if(qual.getName() != ""){
1386 if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
1393 //if you found the barcode or if you don't want to allow for diffs
1394 if ((pdiffs == 0) || (success == 0)) { return success; }
1396 else { //try aligning and see if you can find it
1397 Alignment* alignment;
1398 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1399 else{ alignment = NULL; }
1401 //can you find the barcode
1407 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1408 string oligo = it->first;
1409 // int length = oligo.length();
1411 if(rawSequence.length() < maxFPrimerLength){
1412 success = pdiffs + 100;
1416 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1417 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1418 oligo = alignment->getSeqAAln();
1419 string temp = alignment->getSeqBAln();
1421 int alnLength = oligo.length();
1423 for(int i=oligo.length()-1;i>=0;i--){
1424 if(oligo[i] != '-'){ alnLength = i+1; break; }
1426 oligo = oligo.substr(0,alnLength);
1427 temp = temp.substr(0,alnLength);
1429 int numDiff = countDiffs(oligo, temp);
1431 if(numDiff < minDiff){
1434 minGroup = it->second;
1436 for(int i=0;i<alnLength;i++){
1442 else if(numDiff == minDiff){
1448 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1449 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1450 else{ //use the best match
1452 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
1453 if(qual.getName() != ""){
1454 if (!keepForward) { qual.trimQScores(minPos, -1); }
1459 if (alignment != NULL) { delete alignment; }
1466 catch(exception& e) {
1467 m->errorOut(e, "TrimOligos", "stripForward");
1471 //******************************************************************/
1472 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
1474 string rawSequence = seq.getUnaligned();
1475 bool success = 0; //guilty until proven innocent
1477 for(int i=0;i<revPrimer.size();i++){
1478 string oligo = revPrimer[i];
1480 if(rawSequence.length() < oligo.length()){
1485 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1486 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1487 if(qual.getName() != ""){
1488 qual.trimQScores(-1, rawSequence.length()-oligo.length());
1497 catch(exception& e) {
1498 m->errorOut(e, "TrimOligos", "stripReverse");
1502 //******************************************************************/
1503 bool TrimOligos::stripReverse(Sequence& seq){
1506 string rawSequence = seq.getUnaligned();
1507 bool success = 0; //guilty until proven innocent
1509 for(int i=0;i<revPrimer.size();i++){
1510 string oligo = revPrimer[i];
1512 if(rawSequence.length() < oligo.length()){
1517 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1518 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1527 catch(exception& e) {
1528 m->errorOut(e, "TrimOligos", "stripReverse");
1532 //******************************************************************/
1533 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
1535 string rawSequence = seq.getUnaligned();
1536 bool success = ldiffs + 1; //guilty until proven innocent
1538 for(int i=0;i<linker.size();i++){
1539 string oligo = linker[i];
1541 if(rawSequence.length() < oligo.length()){
1542 success = ldiffs + 10;
1546 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1547 seq.setUnaligned(rawSequence.substr(oligo.length()));
1548 if(qual.getName() != ""){
1549 qual.trimQScores(oligo.length(), -1);
1556 //if you found the linker or if you don't want to allow for diffs
1557 if ((ldiffs == 0) || (success == 0)) { return success; }
1559 else { //try aligning and see if you can find it
1560 Alignment* alignment;
1561 if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
1562 else{ alignment = NULL; }
1564 //can you find the barcode
1569 for(int i = 0; i < linker.size(); i++){
1570 string oligo = linker[i];
1571 // int length = oligo.length();
1573 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
1574 success = ldiffs + 10;
1578 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1579 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1580 oligo = alignment->getSeqAAln();
1581 string temp = alignment->getSeqBAln();
1583 int alnLength = oligo.length();
1585 for(int i=oligo.length()-1;i>=0;i--){
1586 if(oligo[i] != '-'){ alnLength = i+1; break; }
1588 oligo = oligo.substr(0,alnLength);
1589 temp = temp.substr(0,alnLength);
1591 int numDiff = countDiffs(oligo, temp);
1593 if(numDiff < minDiff){
1597 for(int i=0;i<alnLength;i++){
1603 else if(numDiff == minDiff){
1609 if(minDiff > ldiffs) { success = minDiff; } //no good matches
1610 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
1611 else{ //use the best match
1612 seq.setUnaligned(rawSequence.substr(minPos));
1614 if(qual.getName() != ""){
1615 qual.trimQScores(minPos, -1);
1620 if (alignment != NULL) { delete alignment; }
1628 catch(exception& e) {
1629 m->errorOut(e, "TrimOligos", "stripLinker");
1633 //******************************************************************/
1634 bool TrimOligos::stripLinker(Sequence& seq){
1637 string rawSequence = seq.getUnaligned();
1638 bool success = ldiffs +1; //guilty until proven innocent
1640 for(int i=0;i<linker.size();i++){
1641 string oligo = linker[i];
1643 if(rawSequence.length() < oligo.length()){
1644 success = ldiffs +10;
1648 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1649 seq.setUnaligned(rawSequence.substr(oligo.length()));
1655 //if you found the linker or if you don't want to allow for diffs
1656 if ((ldiffs == 0) || (success == 0)) { return success; }
1658 else { //try aligning and see if you can find it
1659 Alignment* alignment;
1660 if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
1661 else{ alignment = NULL; }
1663 //can you find the barcode
1668 for(int i = 0; i < linker.size(); i++){
1669 string oligo = linker[i];
1670 // int length = oligo.length();
1672 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
1673 success = ldiffs + 10;
1677 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1678 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1679 oligo = alignment->getSeqAAln();
1680 string temp = alignment->getSeqBAln();
1682 int alnLength = oligo.length();
1684 for(int i=oligo.length()-1;i>=0;i--){
1685 if(oligo[i] != '-'){ alnLength = i+1; break; }
1687 oligo = oligo.substr(0,alnLength);
1688 temp = temp.substr(0,alnLength);
1690 int numDiff = countDiffs(oligo, temp);
1692 if(numDiff < minDiff){
1696 for(int i=0;i<alnLength;i++){
1702 else if(numDiff == minDiff){
1708 if(minDiff > ldiffs) { success = minDiff; } //no good matches
1709 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
1710 else{ //use the best match
1711 seq.setUnaligned(rawSequence.substr(minPos));
1715 if (alignment != NULL) { delete alignment; }
1722 catch(exception& e) {
1723 m->errorOut(e, "TrimOligos", "stripLinker");
1728 //******************************************************************/
1729 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
1731 string rawSequence = seq.getUnaligned();
1732 bool success = sdiffs+1; //guilty until proven innocent
1734 for(int i=0;i<spacer.size();i++){
1735 string oligo = spacer[i];
1737 if(rawSequence.length() < oligo.length()){
1738 success = sdiffs+10;
1742 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1743 seq.setUnaligned(rawSequence.substr(oligo.length()));
1744 if(qual.getName() != ""){
1745 qual.trimQScores(oligo.length(), -1);
1752 //if you found the spacer or if you don't want to allow for diffs
1753 if ((sdiffs == 0) || (success == 0)) { return success; }
1755 else { //try aligning and see if you can find it
1756 Alignment* alignment;
1757 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
1758 else{ alignment = NULL; }
1760 //can you find the barcode
1765 for(int i = 0; i < spacer.size(); i++){
1766 string oligo = spacer[i];
1767 // int length = oligo.length();
1769 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
1770 success = sdiffs + 10;
1774 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1775 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1776 oligo = alignment->getSeqAAln();
1777 string temp = alignment->getSeqBAln();
1779 int alnLength = oligo.length();
1781 for(int i=oligo.length()-1;i>=0;i--){
1782 if(oligo[i] != '-'){ alnLength = i+1; break; }
1784 oligo = oligo.substr(0,alnLength);
1785 temp = temp.substr(0,alnLength);
1787 int numDiff = countDiffs(oligo, temp);
1789 if(numDiff < minDiff){
1793 for(int i=0;i<alnLength;i++){
1799 else if(numDiff == minDiff){
1805 if(minDiff > sdiffs) { success = minDiff; } //no good matches
1806 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
1807 else{ //use the best match
1808 seq.setUnaligned(rawSequence.substr(minPos));
1810 if(qual.getName() != ""){
1811 qual.trimQScores(minPos, -1);
1816 if (alignment != NULL) { delete alignment; }
1824 catch(exception& e) {
1825 m->errorOut(e, "TrimOligos", "stripSpacer");
1829 //******************************************************************/
1830 bool TrimOligos::stripSpacer(Sequence& seq){
1833 string rawSequence = seq.getUnaligned();
1834 bool success = sdiffs+1; //guilty until proven innocent
1836 for(int i=0;i<spacer.size();i++){
1837 string oligo = spacer[i];
1839 if(rawSequence.length() < oligo.length()){
1840 success = sdiffs+10;
1844 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1845 seq.setUnaligned(rawSequence.substr(oligo.length()));
1851 //if you found the spacer or if you don't want to allow for diffs
1852 if ((sdiffs == 0) || (success == 0)) { return success; }
1854 else { //try aligning and see if you can find it
1855 Alignment* alignment;
1856 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
1857 else{ alignment = NULL; }
1859 //can you find the barcode
1864 for(int i = 0; i < spacer.size(); i++){
1865 string oligo = spacer[i];
1866 // int length = oligo.length();
1868 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
1869 success = sdiffs + 10;
1873 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1874 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1875 oligo = alignment->getSeqAAln();
1876 string temp = alignment->getSeqBAln();
1878 int alnLength = oligo.length();
1880 for(int i=oligo.length()-1;i>=0;i--){
1881 if(oligo[i] != '-'){ alnLength = i+1; break; }
1883 oligo = oligo.substr(0,alnLength);
1884 temp = temp.substr(0,alnLength);
1886 int numDiff = countDiffs(oligo, temp);
1888 if(numDiff < minDiff){
1892 for(int i=0;i<alnLength;i++){
1898 else if(numDiff == minDiff){
1904 if(minDiff > sdiffs) { success = minDiff; } //no good matches
1905 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
1906 else{ //use the best match
1907 seq.setUnaligned(rawSequence.substr(minPos));
1911 if (alignment != NULL) { delete alignment; }
1918 catch(exception& e) {
1919 m->errorOut(e, "TrimOligos", "stripSpacer");
1924 //******************************************************************/
1925 bool TrimOligos::compareDNASeq(string oligo, string seq){
1928 int length = oligo.length();
1930 for(int i=0;i<length;i++){
1932 if(oligo[i] != seq[i]){
1933 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1934 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1935 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1936 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1937 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1938 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1939 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1940 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1941 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1942 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1943 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1944 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1946 if(success == 0) { break; }
1955 catch(exception& e) {
1956 m->errorOut(e, "TrimOligos", "compareDNASeq");
1961 //********************************************************************/
1962 int TrimOligos::countDiffs(string oligo, string seq){
1965 int length = oligo.length();
1968 for(int i=0;i<length;i++){
1970 if(oligo[i] != seq[i]){
1971 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1972 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1973 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1974 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1975 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1976 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1977 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1978 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1979 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1980 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1981 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1982 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1989 catch(exception& e) {
1990 m->errorOut(e, "TrimOligos", "countDiffs");
1994 /********************************************************************/