5 * Created by westcott on 9/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
19 m = MothurOut::getInstance();
33 m->errorOut(e, "TrimOligos", "TrimOligos");
37 /********************************************************************/
38 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
39 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br, vector<string> lk, vector<string> sp){
41 m = MothurOut::getInstance();
54 m->errorOut(e, "TrimOligos", "TrimOligos");
58 /********************************************************************/
59 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
60 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
62 m = MothurOut::getInstance();
72 m->errorOut(e, "TrimOligos", "TrimOligos");
76 /********************************************************************/
77 TrimOligos::~TrimOligos() {}
78 //*******************************************************************/
79 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
82 string rawSequence = seq.getUnaligned();
83 int success = bdiffs + 1; //guilty until proven innocent
85 //can you find the barcode
86 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
87 string oligo = it->first;
88 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
89 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
93 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
95 seq.setUnaligned(rawSequence.substr(oligo.length()));
97 if(qual.getName() != ""){
98 qual.trimQScores(oligo.length(), -1);
106 //if you found the barcode or if you don't want to allow for diffs
107 if ((bdiffs == 0) || (success == 0)) { return success; }
109 else { //try aligning and see if you can find it
113 Alignment* alignment;
114 if (barcodes.size() > 0) {
115 map<string,int>::iterator it;
117 for(it=barcodes.begin();it!=barcodes.end();it++){
118 if(it->first.length() > maxLength){
119 maxLength = it->first.length();
122 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
124 }else{ alignment = NULL; }
126 //can you find the barcode
132 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
133 string oligo = it->first;
134 // int length = oligo.length();
136 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
137 success = bdiffs + 10;
141 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
142 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
143 oligo = alignment->getSeqAAln();
144 string temp = alignment->getSeqBAln();
146 int alnLength = oligo.length();
148 for(int i=oligo.length()-1;i>=0;i--){
149 if(oligo[i] != '-'){ alnLength = i+1; break; }
151 oligo = oligo.substr(0,alnLength);
152 temp = temp.substr(0,alnLength);
153 int numDiff = countDiffs(oligo, temp);
155 if(numDiff < minDiff){
158 minGroup = it->second;
160 for(int i=0;i<alnLength;i++){
166 else if(numDiff == minDiff){
172 if(minDiff > bdiffs) { success = minDiff; } //no good matches
173 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
174 else{ //use the best match
176 seq.setUnaligned(rawSequence.substr(minPos));
178 if(qual.getName() != ""){
179 qual.trimQScores(minPos, -1);
184 if (alignment != NULL) { delete alignment; }
191 catch(exception& e) {
192 m->errorOut(e, "TrimOligos", "stripBarcode");
196 //*******************************************************************/
197 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
199 //look for forward barcode
200 string rawFSequence = forwardSeq.getUnaligned();
201 string rawRSequence = reverseSeq.getUnaligned();
202 int success = bdiffs + 1; //guilty until proven innocent
204 //can you find the forward barcode
205 for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
206 string foligo = it->second.forward;
207 string roligo = it->second.reverse;
209 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
210 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
213 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
214 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
218 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
220 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
221 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
222 forwardQual.trimQScores(foligo.length(), -1);
223 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
229 //if you found the barcode or if you don't want to allow for diffs
230 if ((bdiffs == 0) || (success == 0)) { return success; }
231 else { //try aligning and see if you can find it
236 Alignment* alignment;
237 if (ibarcodes.size() > 0) {
238 for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
239 if(it->second.forward.length() > maxLength){ maxLength = it->second.forward.length(); }
241 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
242 }else{ alignment = NULL; }
244 //can you find the barcode
250 for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
251 string oligo = it->second.forward;
253 if(rawFSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
254 success = bdiffs + 10;
258 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
259 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
260 oligo = alignment->getSeqAAln();
261 string temp = alignment->getSeqBAln();
263 int alnLength = oligo.length();
265 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
266 oligo = oligo.substr(0,alnLength);
267 temp = temp.substr(0,alnLength);
268 int numDiff = countDiffs(oligo, temp);
270 if(numDiff < minDiff){
273 minFGroup = it->first;
275 for(int i=0;i<alnLength;i++){
280 }else if(numDiff == minDiff){
286 if(minDiff > bdiffs) { success = minDiff; } //no good matches
287 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
289 //check for reverse match
290 if (alignment != NULL) { delete alignment; }
293 if (ibarcodes.size() > 0) {
294 for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
295 if(it->second.reverse.length() > maxLength){ maxLength = it->second.reverse.length(); }
297 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
298 }else{ alignment = NULL; }
300 //can you find the barcode
306 for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
307 string oligo = it->second.reverse;
309 if(rawRSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
310 success = bdiffs + 10;
314 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
315 alignment->align(oligo, rawRSequence.substr((rawRSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
316 oligo = alignment->getSeqAAln();
317 string temp = alignment->getSeqBAln();
319 int alnLength = oligo.length();
320 for(int i=0;i<alnLength;i++){ if(oligo[i] != '-'){ alnLength = i; break; } }
321 oligo = oligo.substr(0,alnLength);
322 temp = temp.substr(0,alnLength);
323 int numDiff = countDiffs(oligo, temp);
325 if(numDiff < minDiff){
328 minRGroup = it->first;
330 for(int i=0;i<alnLength;i++){
335 }else if(numDiff == minDiff){
341 if(minDiff > bdiffs) { success = minDiff; } //no good matches
342 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
344 //we have an acceptable match for the forward and reverse, but do they match?
345 if (minFGroup == minRGroup) {
347 forwardSeq.setUnaligned(rawFSequence.substr(minFPos));
348 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-minRPos)));
349 forwardQual.trimQScores(minFPos, -1);
350 reverseQual.trimQScores(-1, rawRSequence.length()-minRPos);
352 }else { success = bdiffs + 100; }
356 if (alignment != NULL) { delete alignment; }
362 catch(exception& e) {
363 m->errorOut(e, "TrimOligos", "stripIBarcode");
368 //*******************************************************************/
369 int TrimOligos::stripBarcode(Sequence& seq, int& group){
372 string rawSequence = seq.getUnaligned();
373 int success = bdiffs + 1; //guilty until proven innocent
375 //can you find the barcode
376 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
377 string oligo = it->first;
378 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
379 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
383 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
385 seq.setUnaligned(rawSequence.substr(oligo.length()));
392 //if you found the barcode or if you don't want to allow for diffs
393 if ((bdiffs == 0) || (success == 0)) { return success; }
395 else { //try aligning and see if you can find it
399 Alignment* alignment;
400 if (barcodes.size() > 0) {
401 map<string,int>::iterator it=barcodes.begin();
403 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
404 if(it->first.length() > maxLength){
405 maxLength = it->first.length();
408 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
410 }else{ alignment = NULL; }
412 //can you find the barcode
418 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
419 string oligo = it->first;
420 // int length = oligo.length();
422 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
423 success = bdiffs + 10;
427 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
428 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
429 oligo = alignment->getSeqAAln();
430 string temp = alignment->getSeqBAln();
432 int alnLength = oligo.length();
434 for(int i=oligo.length()-1;i>=0;i--){
435 if(oligo[i] != '-'){ alnLength = i+1; break; }
437 oligo = oligo.substr(0,alnLength);
438 temp = temp.substr(0,alnLength);
440 int numDiff = countDiffs(oligo, temp);
442 if(numDiff < minDiff){
445 minGroup = it->second;
447 for(int i=0;i<alnLength;i++){
453 else if(numDiff == minDiff){
459 if(minDiff > bdiffs) { success = minDiff; } //no good matches
460 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
461 else{ //use the best match
463 seq.setUnaligned(rawSequence.substr(minPos));
467 if (alignment != NULL) { delete alignment; }
474 catch(exception& e) {
475 m->errorOut(e, "TrimOligos", "stripBarcode");
480 /*******************************************************************
481 int TrimOligos::stripRBarcode(Sequence& seq, QualityScores& qual, int& group){
484 string rawSequence = seq.getUnaligned();
485 int success = bdiffs + 1; //guilty until proven innocent
487 //can you find the barcode
488 for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
489 string oligo = it->first;
490 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
491 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
495 if(compareDNASeq(oligo, rawSequence.substr((rawSequence.length()-oligo.length()),oligo.length()))){
497 seq.setUnaligned(rawSequence.substr(0,(rawSequence.length()-oligo.length())));
499 if(qual.getName() != ""){
500 qual.trimQScores(-1, rawSequence.length()-oligo.length());
508 //if you found the barcode or if you don't want to allow for diffs
509 if ((bdiffs == 0) || (success == 0)) { return success; }
511 else { //try aligning and see if you can find it
515 Alignment* alignment;
516 if (rbarcodes.size() > 0) {
517 map<string,int>::iterator it;
519 for(it=rbarcodes.begin();it!=rbarcodes.end();it++){
520 if(it->first.length() > maxLength){
521 maxLength = it->first.length();
524 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
526 }else{ alignment = NULL; }
528 //can you find the barcode
534 for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
535 string oligo = it->first;
536 // int length = oligo.length();
538 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
539 success = bdiffs + 10;
543 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
544 alignment->align(oligo, rawSequence.substr((rawSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
545 oligo = alignment->getSeqAAln();
546 string temp = alignment->getSeqBAln();
548 int alnLength = oligo.length();
550 for(int i=0;i<alnLength;i++){
551 if(oligo[i] != '-'){ alnLength = i; break; }
553 oligo = oligo.substr(alnLength);
554 temp = temp.substr(alnLength);
556 int numDiff = countDiffs(oligo, temp);
558 if(numDiff < minDiff){
561 minGroup = it->second;
563 for(int i=alnLength-1;i>=0;i--){
569 else if(numDiff == minDiff){
575 if(minDiff > bdiffs) { success = minDiff; } //no good matches
576 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
577 else{ //use the best match
579 seq.setUnaligned(rawSequence.substr(0, (rawSequence.length()-minPos)));
581 if(qual.getName() != ""){
582 qual.trimQScores(-1, (rawSequence.length()-minPos));
587 if (alignment != NULL) { delete alignment; }
594 catch(exception& e) {
595 m->errorOut(e, "TrimOligos", "stripRBarcode");
600 /*******************************************************************
601 int TrimOligos::stripRBarcode(Sequence& seq, int& group){
604 string rawSequence = seq.getUnaligned();
605 int success = bdiffs + 1; //guilty until proven innocent
607 //can you find the barcode
608 for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
609 string oligo = it->first;
610 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
611 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
615 if(compareDNASeq(oligo, rawSequence.substr((rawSequence.length()-oligo.length()),oligo.length()))){
617 seq.setUnaligned(rawSequence.substr(0,(rawSequence.length()-oligo.length())));
624 //if you found the barcode or if you don't want to allow for diffs
625 if ((bdiffs == 0) || (success == 0)) { return success; }
627 else { //try aligning and see if you can find it
631 Alignment* alignment;
632 if (rbarcodes.size() > 0) {
633 map<string,int>::iterator it;
635 for(it=rbarcodes.begin();it!=rbarcodes.end();it++){
636 if(it->first.length() > maxLength){
637 maxLength = it->first.length();
640 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
642 }else{ alignment = NULL; }
644 //can you find the barcode
650 for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
651 string oligo = it->first;
652 // int length = oligo.length();
654 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
655 success = bdiffs + 10;
659 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
660 alignment->align(oligo, rawSequence.substr((rawSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
661 oligo = alignment->getSeqAAln();
662 string temp = alignment->getSeqBAln();
664 int alnLength = oligo.length();
666 for(int i=0;i<alnLength;i++){
667 if(oligo[i] != '-'){ alnLength = i; break; }
669 oligo = oligo.substr(alnLength);
670 temp = temp.substr(alnLength);
672 int numDiff = countDiffs(oligo, temp);
674 if(numDiff < minDiff){
677 minGroup = it->second;
679 for(int i=alnLength-1;i>=0;i--){
685 else if(numDiff == minDiff){
691 if(minDiff > bdiffs) { success = minDiff; } //no good matches
692 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
693 else{ //use the best match
695 seq.setUnaligned(rawSequence.substr(0, (rawSequence.length()-minPos)));
700 if (alignment != NULL) { delete alignment; }
707 catch(exception& e) {
708 m->errorOut(e, "TrimOligos", "stripRBarcode");
713 //********************************************************************/
714 int TrimOligos::stripForward(Sequence& seq, int& group){
717 string rawSequence = seq.getUnaligned();
718 int success = pdiffs + 1; //guilty until proven innocent
720 //can you find the primer
721 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
722 string oligo = it->first;
723 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
724 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
728 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
730 seq.setUnaligned(rawSequence.substr(oligo.length()));
736 //if you found the barcode or if you don't want to allow for diffs
737 if ((pdiffs == 0) || (success == 0)) { return success; }
739 else { //try aligning and see if you can find it
743 Alignment* alignment;
744 if (primers.size() > 0) {
745 map<string,int>::iterator it;
747 for(it=primers.begin();it!=primers.end();it++){
748 if(it->first.length() > maxLength){
749 maxLength = it->first.length();
752 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
754 }else{ alignment = NULL; }
756 //can you find the barcode
762 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
763 string oligo = it->first;
764 // int length = oligo.length();
766 if(rawSequence.length() < maxLength){
767 success = pdiffs + 100;
771 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
772 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
773 oligo = alignment->getSeqAAln();
774 string temp = alignment->getSeqBAln();
776 int alnLength = oligo.length();
778 for(int i=oligo.length()-1;i>=0;i--){
779 if(oligo[i] != '-'){ alnLength = i+1; break; }
781 oligo = oligo.substr(0,alnLength);
782 temp = temp.substr(0,alnLength);
784 int numDiff = countDiffs(oligo, temp);
786 if(numDiff < minDiff){
789 minGroup = it->second;
791 for(int i=0;i<alnLength;i++){
797 else if(numDiff == minDiff){
803 if(minDiff > pdiffs) { success = minDiff; } //no good matches
804 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
805 else{ //use the best match
807 seq.setUnaligned(rawSequence.substr(minPos));
811 if (alignment != NULL) { delete alignment; }
818 catch(exception& e) {
819 m->errorOut(e, "TrimOligos", "stripForward");
823 //*******************************************************************/
824 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
826 string rawSequence = seq.getUnaligned();
827 int success = pdiffs + 1; //guilty until proven innocent
829 //can you find the primer
830 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
831 string oligo = it->first;
832 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
833 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
837 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
839 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
840 if(qual.getName() != ""){
841 if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
848 //if you found the barcode or if you don't want to allow for diffs
849 if ((pdiffs == 0) || (success == 0)) { return success; }
851 else { //try aligning and see if you can find it
855 Alignment* alignment;
856 if (primers.size() > 0) {
857 map<string,int>::iterator it;
859 for(it=primers.begin();it!=primers.end();it++){
860 if(it->first.length() > maxLength){
861 maxLength = it->first.length();
864 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
866 }else{ alignment = NULL; }
868 //can you find the barcode
874 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
875 string oligo = it->first;
876 // int length = oligo.length();
878 if(rawSequence.length() < maxLength){
879 success = pdiffs + 100;
883 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
884 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
885 oligo = alignment->getSeqAAln();
886 string temp = alignment->getSeqBAln();
888 int alnLength = oligo.length();
890 for(int i=oligo.length()-1;i>=0;i--){
891 if(oligo[i] != '-'){ alnLength = i+1; break; }
893 oligo = oligo.substr(0,alnLength);
894 temp = temp.substr(0,alnLength);
896 int numDiff = countDiffs(oligo, temp);
898 if(numDiff < minDiff){
901 minGroup = it->second;
903 for(int i=0;i<alnLength;i++){
909 else if(numDiff == minDiff){
915 if(minDiff > pdiffs) { success = minDiff; } //no good matches
916 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
917 else{ //use the best match
919 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
920 if(qual.getName() != ""){
921 if (!keepForward) { qual.trimQScores(minPos, -1); }
926 if (alignment != NULL) { delete alignment; }
933 catch(exception& e) {
934 m->errorOut(e, "TrimOligos", "stripForward");
938 //******************************************************************/
939 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
941 string rawSequence = seq.getUnaligned();
942 bool success = 0; //guilty until proven innocent
944 for(int i=0;i<revPrimer.size();i++){
945 string oligo = revPrimer[i];
947 if(rawSequence.length() < oligo.length()){
952 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
953 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
954 if(qual.getName() != ""){
955 qual.trimQScores(-1, rawSequence.length()-oligo.length());
964 catch(exception& e) {
965 m->errorOut(e, "TrimOligos", "stripReverse");
969 //******************************************************************/
970 bool TrimOligos::stripReverse(Sequence& seq){
973 string rawSequence = seq.getUnaligned();
974 bool success = 0; //guilty until proven innocent
976 for(int i=0;i<revPrimer.size();i++){
977 string oligo = revPrimer[i];
979 if(rawSequence.length() < oligo.length()){
984 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
985 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
994 catch(exception& e) {
995 m->errorOut(e, "TrimOligos", "stripReverse");
999 //******************************************************************/
1000 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
1002 string rawSequence = seq.getUnaligned();
1003 bool success = ldiffs + 1; //guilty until proven innocent
1005 for(int i=0;i<linker.size();i++){
1006 string oligo = linker[i];
1008 if(rawSequence.length() < oligo.length()){
1009 success = ldiffs + 10;
1013 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1014 seq.setUnaligned(rawSequence.substr(oligo.length()));
1015 if(qual.getName() != ""){
1016 qual.trimQScores(oligo.length(), -1);
1023 //if you found the linker or if you don't want to allow for diffs
1024 if ((ldiffs == 0) || (success == 0)) { return success; }
1026 else { //try aligning and see if you can find it
1030 Alignment* alignment;
1031 if (linker.size() > 0) {
1032 for(int i = 0; i < linker.size(); i++){
1033 if(linker[i].length() > maxLength){
1034 maxLength = linker[i].length();
1037 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));
1039 }else{ alignment = NULL; }
1041 //can you find the barcode
1046 for(int i = 0; i < linker.size(); i++){
1047 string oligo = linker[i];
1048 // int length = oligo.length();
1050 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1051 success = ldiffs + 10;
1055 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1056 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1057 oligo = alignment->getSeqAAln();
1058 string temp = alignment->getSeqBAln();
1060 int alnLength = oligo.length();
1062 for(int i=oligo.length()-1;i>=0;i--){
1063 if(oligo[i] != '-'){ alnLength = i+1; break; }
1065 oligo = oligo.substr(0,alnLength);
1066 temp = temp.substr(0,alnLength);
1068 int numDiff = countDiffs(oligo, temp);
1070 if(numDiff < minDiff){
1074 for(int i=0;i<alnLength;i++){
1080 else if(numDiff == minDiff){
1086 if(minDiff > ldiffs) { success = minDiff; } //no good matches
1087 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
1088 else{ //use the best match
1089 seq.setUnaligned(rawSequence.substr(minPos));
1091 if(qual.getName() != ""){
1092 qual.trimQScores(minPos, -1);
1097 if (alignment != NULL) { delete alignment; }
1105 catch(exception& e) {
1106 m->errorOut(e, "TrimOligos", "stripLinker");
1110 //******************************************************************/
1111 bool TrimOligos::stripLinker(Sequence& seq){
1114 string rawSequence = seq.getUnaligned();
1115 bool success = ldiffs +1; //guilty until proven innocent
1117 for(int i=0;i<linker.size();i++){
1118 string oligo = linker[i];
1120 if(rawSequence.length() < oligo.length()){
1121 success = ldiffs +10;
1125 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1126 seq.setUnaligned(rawSequence.substr(oligo.length()));
1132 //if you found the linker or if you don't want to allow for diffs
1133 if ((ldiffs == 0) || (success == 0)) { return success; }
1135 else { //try aligning and see if you can find it
1139 Alignment* alignment;
1140 if (linker.size() > 0) {
1141 for(int i = 0; i < linker.size(); i++){
1142 if(linker[i].length() > maxLength){
1143 maxLength = linker[i].length();
1146 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));
1148 }else{ alignment = NULL; }
1150 //can you find the barcode
1155 for(int i = 0; i < linker.size(); i++){
1156 string oligo = linker[i];
1157 // int length = oligo.length();
1159 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1160 success = ldiffs + 10;
1164 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1165 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1166 oligo = alignment->getSeqAAln();
1167 string temp = alignment->getSeqBAln();
1169 int alnLength = oligo.length();
1171 for(int i=oligo.length()-1;i>=0;i--){
1172 if(oligo[i] != '-'){ alnLength = i+1; break; }
1174 oligo = oligo.substr(0,alnLength);
1175 temp = temp.substr(0,alnLength);
1177 int numDiff = countDiffs(oligo, temp);
1179 if(numDiff < minDiff){
1183 for(int i=0;i<alnLength;i++){
1189 else if(numDiff == minDiff){
1195 if(minDiff > ldiffs) { success = minDiff; } //no good matches
1196 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
1197 else{ //use the best match
1198 seq.setUnaligned(rawSequence.substr(minPos));
1202 if (alignment != NULL) { delete alignment; }
1209 catch(exception& e) {
1210 m->errorOut(e, "TrimOligos", "stripLinker");
1215 //******************************************************************/
1216 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
1218 string rawSequence = seq.getUnaligned();
1219 bool success = sdiffs+1; //guilty until proven innocent
1221 for(int i=0;i<spacer.size();i++){
1222 string oligo = spacer[i];
1224 if(rawSequence.length() < oligo.length()){
1225 success = sdiffs+10;
1229 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1230 seq.setUnaligned(rawSequence.substr(oligo.length()));
1231 if(qual.getName() != ""){
1232 qual.trimQScores(oligo.length(), -1);
1239 //if you found the spacer or if you don't want to allow for diffs
1240 if ((sdiffs == 0) || (success == 0)) { return success; }
1242 else { //try aligning and see if you can find it
1246 Alignment* alignment;
1247 if (spacer.size() > 0) {
1248 for(int i = 0; i < spacer.size(); i++){
1249 if(spacer[i].length() > maxLength){
1250 maxLength = spacer[i].length();
1253 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));
1255 }else{ alignment = NULL; }
1257 //can you find the barcode
1262 for(int i = 0; i < spacer.size(); i++){
1263 string oligo = spacer[i];
1264 // int length = oligo.length();
1266 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1267 success = sdiffs + 10;
1271 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1272 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1273 oligo = alignment->getSeqAAln();
1274 string temp = alignment->getSeqBAln();
1276 int alnLength = oligo.length();
1278 for(int i=oligo.length()-1;i>=0;i--){
1279 if(oligo[i] != '-'){ alnLength = i+1; break; }
1281 oligo = oligo.substr(0,alnLength);
1282 temp = temp.substr(0,alnLength);
1284 int numDiff = countDiffs(oligo, temp);
1286 if(numDiff < minDiff){
1290 for(int i=0;i<alnLength;i++){
1296 else if(numDiff == minDiff){
1302 if(minDiff > sdiffs) { success = minDiff; } //no good matches
1303 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
1304 else{ //use the best match
1305 seq.setUnaligned(rawSequence.substr(minPos));
1307 if(qual.getName() != ""){
1308 qual.trimQScores(minPos, -1);
1313 if (alignment != NULL) { delete alignment; }
1321 catch(exception& e) {
1322 m->errorOut(e, "TrimOligos", "stripSpacer");
1326 //******************************************************************/
1327 bool TrimOligos::stripSpacer(Sequence& seq){
1330 string rawSequence = seq.getUnaligned();
1331 bool success = sdiffs+1; //guilty until proven innocent
1333 for(int i=0;i<spacer.size();i++){
1334 string oligo = spacer[i];
1336 if(rawSequence.length() < oligo.length()){
1337 success = sdiffs+10;
1341 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1342 seq.setUnaligned(rawSequence.substr(oligo.length()));
1348 //if you found the spacer or if you don't want to allow for diffs
1349 if ((sdiffs == 0) || (success == 0)) { return success; }
1351 else { //try aligning and see if you can find it
1355 Alignment* alignment;
1356 if (spacer.size() > 0) {
1357 for(int i = 0; i < spacer.size(); i++){
1358 if(spacer[i].length() > maxLength){
1359 maxLength = spacer[i].length();
1362 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));
1364 }else{ alignment = NULL; }
1366 //can you find the barcode
1371 for(int i = 0; i < spacer.size(); i++){
1372 string oligo = spacer[i];
1373 // int length = oligo.length();
1375 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1376 success = sdiffs + 10;
1380 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1381 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1382 oligo = alignment->getSeqAAln();
1383 string temp = alignment->getSeqBAln();
1385 int alnLength = oligo.length();
1387 for(int i=oligo.length()-1;i>=0;i--){
1388 if(oligo[i] != '-'){ alnLength = i+1; break; }
1390 oligo = oligo.substr(0,alnLength);
1391 temp = temp.substr(0,alnLength);
1393 int numDiff = countDiffs(oligo, temp);
1395 if(numDiff < minDiff){
1399 for(int i=0;i<alnLength;i++){
1405 else if(numDiff == minDiff){
1411 if(minDiff > sdiffs) { success = minDiff; } //no good matches
1412 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
1413 else{ //use the best match
1414 seq.setUnaligned(rawSequence.substr(minPos));
1418 if (alignment != NULL) { delete alignment; }
1425 catch(exception& e) {
1426 m->errorOut(e, "TrimOligos", "stripSpacer");
1431 //******************************************************************/
1432 bool TrimOligos::compareDNASeq(string oligo, string seq){
1435 int length = oligo.length();
1437 for(int i=0;i<length;i++){
1439 if(oligo[i] != seq[i]){
1440 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1441 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1442 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1443 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1444 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1445 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1446 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1447 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1448 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1449 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1450 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1451 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1453 if(success == 0) { break; }
1462 catch(exception& e) {
1463 m->errorOut(e, "TrimOligos", "compareDNASeq");
1468 //********************************************************************/
1469 int TrimOligos::countDiffs(string oligo, string seq){
1472 int length = oligo.length();
1475 for(int i=0;i<length;i++){
1477 if(oligo[i] != seq[i]){
1478 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1479 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1480 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1481 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1482 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1483 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1484 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1485 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1486 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1487 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1488 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1489 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1496 catch(exception& e) {
1497 m->errorOut(e, "TrimOligos", "countDiffs");
1501 /********************************************************************/