5 * Created by westcott on 9/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, map<string, int> rbr, vector<string> r, vector<string> lk, vector<string> sp){
19 m = MothurOut::getInstance();
34 m->errorOut(e, "TrimOligos", "TrimOligos");
38 /********************************************************************/
39 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
40 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
42 m = MothurOut::getInstance();
56 m->errorOut(e, "TrimOligos", "TrimOligos");
60 /********************************************************************/
61 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
62 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
64 m = MothurOut::getInstance();
74 m->errorOut(e, "TrimOligos", "TrimOligos");
78 /********************************************************************/
79 TrimOligos::~TrimOligos() {}
80 //*******************************************************************/
81 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
84 string rawSequence = seq.getUnaligned();
85 int success = bdiffs + 1; //guilty until proven innocent
87 //can you find the barcode
88 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
89 string oligo = it->first;
90 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
91 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
95 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
97 seq.setUnaligned(rawSequence.substr(oligo.length()));
99 if(qual.getName() != ""){
100 qual.trimQScores(oligo.length(), -1);
108 //if you found the barcode or if you don't want to allow for diffs
109 if ((bdiffs == 0) || (success == 0)) { return success; }
111 else { //try aligning and see if you can find it
115 Alignment* alignment;
116 if (barcodes.size() > 0) {
117 map<string,int>::iterator it;
119 for(it=barcodes.begin();it!=barcodes.end();it++){
120 if(it->first.length() > maxLength){
121 maxLength = it->first.length();
124 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
126 }else{ alignment = NULL; }
128 //can you find the barcode
134 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
135 string oligo = it->first;
136 // int length = oligo.length();
138 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
139 success = bdiffs + 10;
143 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
144 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
145 oligo = alignment->getSeqAAln();
146 string temp = alignment->getSeqBAln();
148 int alnLength = oligo.length();
150 for(int i=oligo.length()-1;i>=0;i--){
151 if(oligo[i] != '-'){ alnLength = i+1; break; }
153 oligo = oligo.substr(0,alnLength);
154 temp = temp.substr(0,alnLength);
155 int numDiff = countDiffs(oligo, temp);
157 if(numDiff < minDiff){
160 minGroup = it->second;
162 for(int i=0;i<alnLength;i++){
168 else if(numDiff == minDiff){
174 if(minDiff > bdiffs) { success = minDiff; } //no good matches
175 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
176 else{ //use the best match
178 seq.setUnaligned(rawSequence.substr(minPos));
180 if(qual.getName() != ""){
181 qual.trimQScores(minPos, -1);
186 if (alignment != NULL) { delete alignment; }
193 catch(exception& e) {
194 m->errorOut(e, "TrimOligos", "stripBarcode");
199 //*******************************************************************/
200 int TrimOligos::stripBarcode(Sequence& seq, int& group){
203 string rawSequence = seq.getUnaligned();
204 int success = bdiffs + 1; //guilty until proven innocent
206 //can you find the barcode
207 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
208 string oligo = it->first;
209 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
210 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
214 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
216 seq.setUnaligned(rawSequence.substr(oligo.length()));
223 //if you found the barcode or if you don't want to allow for diffs
224 if ((bdiffs == 0) || (success == 0)) { return success; }
226 else { //try aligning and see if you can find it
230 Alignment* alignment;
231 if (barcodes.size() > 0) {
232 map<string,int>::iterator it=barcodes.begin();
234 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
235 if(it->first.length() > maxLength){
236 maxLength = it->first.length();
239 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
241 }else{ alignment = NULL; }
243 //can you find the barcode
249 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
250 string oligo = it->first;
251 // int length = oligo.length();
253 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
254 success = bdiffs + 10;
258 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
259 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
260 oligo = alignment->getSeqAAln();
261 string temp = alignment->getSeqBAln();
263 int alnLength = oligo.length();
265 for(int i=oligo.length()-1;i>=0;i--){
266 if(oligo[i] != '-'){ alnLength = i+1; break; }
268 oligo = oligo.substr(0,alnLength);
269 temp = temp.substr(0,alnLength);
271 int numDiff = countDiffs(oligo, temp);
273 if(numDiff < minDiff){
276 minGroup = it->second;
278 for(int i=0;i<alnLength;i++){
284 else if(numDiff == minDiff){
290 if(minDiff > bdiffs) { success = minDiff; } //no good matches
291 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
292 else{ //use the best match
294 seq.setUnaligned(rawSequence.substr(minPos));
298 if (alignment != NULL) { delete alignment; }
305 catch(exception& e) {
306 m->errorOut(e, "TrimOligos", "stripBarcode");
311 //*******************************************************************/
312 int TrimOligos::stripRBarcode(Sequence& seq, QualityScores& qual, int& group){
315 string rawSequence = seq.getUnaligned();
316 int success = bdiffs + 1; //guilty until proven innocent
318 //can you find the barcode
319 for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
320 string oligo = it->first;
321 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
322 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
326 if(compareDNASeq(oligo, rawSequence.substr((rawSequence.length()-oligo.length()),oligo.length()))){
328 seq.setUnaligned(rawSequence.substr(0,(rawSequence.length()-oligo.length())));
330 if(qual.getName() != ""){
331 qual.trimQScores(-1, rawSequence.length()-oligo.length());
339 //if you found the barcode or if you don't want to allow for diffs
340 if ((bdiffs == 0) || (success == 0)) { return success; }
342 else { //try aligning and see if you can find it
346 Alignment* alignment;
347 if (rbarcodes.size() > 0) {
348 map<string,int>::iterator it;
350 for(it=rbarcodes.begin();it!=rbarcodes.end();it++){
351 if(it->first.length() > maxLength){
352 maxLength = it->first.length();
355 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
357 }else{ alignment = NULL; }
359 //can you find the barcode
365 for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
366 string oligo = it->first;
367 // int length = oligo.length();
369 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
370 success = bdiffs + 10;
374 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
375 alignment->align(oligo, rawSequence.substr((rawSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
376 oligo = alignment->getSeqAAln();
377 string temp = alignment->getSeqBAln();
379 int alnLength = oligo.length();
381 for(int i=0;i<alnLength;i++){
382 if(oligo[i] != '-'){ alnLength = i; break; }
384 oligo = oligo.substr(alnLength);
385 temp = temp.substr(alnLength);
387 int numDiff = countDiffs(oligo, temp);
389 if(numDiff < minDiff){
392 minGroup = it->second;
394 for(int i=alnLength-1;i>=0;i--){
400 else if(numDiff == minDiff){
406 if(minDiff > bdiffs) { success = minDiff; } //no good matches
407 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
408 else{ //use the best match
410 seq.setUnaligned(rawSequence.substr(0, (rawSequence.length()-minPos)));
412 if(qual.getName() != ""){
413 qual.trimQScores(-1, (rawSequence.length()-minPos));
418 if (alignment != NULL) { delete alignment; }
425 catch(exception& e) {
426 m->errorOut(e, "TrimOligos", "stripRBarcode");
431 //*******************************************************************/
432 int TrimOligos::stripRBarcode(Sequence& seq, int& group){
435 string rawSequence = seq.getUnaligned();
436 int success = bdiffs + 1; //guilty until proven innocent
438 //can you find the barcode
439 for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
440 string oligo = it->first;
441 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
442 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
446 if(compareDNASeq(oligo, rawSequence.substr((rawSequence.length()-oligo.length()),oligo.length()))){
448 seq.setUnaligned(rawSequence.substr(0,(rawSequence.length()-oligo.length())));
455 //if you found the barcode or if you don't want to allow for diffs
456 if ((bdiffs == 0) || (success == 0)) { return success; }
458 else { //try aligning and see if you can find it
462 Alignment* alignment;
463 if (rbarcodes.size() > 0) {
464 map<string,int>::iterator it;
466 for(it=rbarcodes.begin();it!=rbarcodes.end();it++){
467 if(it->first.length() > maxLength){
468 maxLength = it->first.length();
471 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
473 }else{ alignment = NULL; }
475 //can you find the barcode
481 for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
482 string oligo = it->first;
483 // int length = oligo.length();
485 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
486 success = bdiffs + 10;
490 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
491 alignment->align(oligo, rawSequence.substr((rawSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
492 oligo = alignment->getSeqAAln();
493 string temp = alignment->getSeqBAln();
495 int alnLength = oligo.length();
497 for(int i=0;i<alnLength;i++){
498 if(oligo[i] != '-'){ alnLength = i; break; }
500 oligo = oligo.substr(alnLength);
501 temp = temp.substr(alnLength);
503 int numDiff = countDiffs(oligo, temp);
505 if(numDiff < minDiff){
508 minGroup = it->second;
510 for(int i=alnLength-1;i>=0;i--){
516 else if(numDiff == minDiff){
522 if(minDiff > bdiffs) { success = minDiff; } //no good matches
523 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
524 else{ //use the best match
526 seq.setUnaligned(rawSequence.substr(0, (rawSequence.length()-minPos)));
531 if (alignment != NULL) { delete alignment; }
538 catch(exception& e) {
539 m->errorOut(e, "TrimOligos", "stripRBarcode");
544 //********************************************************************/
545 int TrimOligos::stripForward(Sequence& seq, int& group){
548 string rawSequence = seq.getUnaligned();
549 int success = pdiffs + 1; //guilty until proven innocent
551 //can you find the primer
552 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
553 string oligo = it->first;
554 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
555 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
559 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
561 seq.setUnaligned(rawSequence.substr(oligo.length()));
567 //if you found the barcode or if you don't want to allow for diffs
568 if ((pdiffs == 0) || (success == 0)) { return success; }
570 else { //try aligning and see if you can find it
574 Alignment* alignment;
575 if (primers.size() > 0) {
576 map<string,int>::iterator it;
578 for(it=primers.begin();it!=primers.end();it++){
579 if(it->first.length() > maxLength){
580 maxLength = it->first.length();
583 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
585 }else{ alignment = NULL; }
587 //can you find the barcode
593 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
594 string oligo = it->first;
595 // int length = oligo.length();
597 if(rawSequence.length() < maxLength){
598 success = pdiffs + 100;
602 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
603 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
604 oligo = alignment->getSeqAAln();
605 string temp = alignment->getSeqBAln();
607 int alnLength = oligo.length();
609 for(int i=oligo.length()-1;i>=0;i--){
610 if(oligo[i] != '-'){ alnLength = i+1; break; }
612 oligo = oligo.substr(0,alnLength);
613 temp = temp.substr(0,alnLength);
615 int numDiff = countDiffs(oligo, temp);
617 if(numDiff < minDiff){
620 minGroup = it->second;
622 for(int i=0;i<alnLength;i++){
628 else if(numDiff == minDiff){
634 if(minDiff > pdiffs) { success = minDiff; } //no good matches
635 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
636 else{ //use the best match
638 seq.setUnaligned(rawSequence.substr(minPos));
642 if (alignment != NULL) { delete alignment; }
649 catch(exception& e) {
650 m->errorOut(e, "TrimOligos", "stripForward");
654 //*******************************************************************/
655 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
657 string rawSequence = seq.getUnaligned();
658 int success = pdiffs + 1; //guilty until proven innocent
660 //can you find the primer
661 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
662 string oligo = it->first;
663 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
664 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
668 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
670 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
671 if(qual.getName() != ""){
672 if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
679 //if you found the barcode or if you don't want to allow for diffs
680 if ((pdiffs == 0) || (success == 0)) { return success; }
682 else { //try aligning and see if you can find it
686 Alignment* alignment;
687 if (primers.size() > 0) {
688 map<string,int>::iterator it;
690 for(it=primers.begin();it!=primers.end();it++){
691 if(it->first.length() > maxLength){
692 maxLength = it->first.length();
695 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
697 }else{ alignment = NULL; }
699 //can you find the barcode
705 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
706 string oligo = it->first;
707 // int length = oligo.length();
709 if(rawSequence.length() < maxLength){
710 success = pdiffs + 100;
714 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
715 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
716 oligo = alignment->getSeqAAln();
717 string temp = alignment->getSeqBAln();
719 int alnLength = oligo.length();
721 for(int i=oligo.length()-1;i>=0;i--){
722 if(oligo[i] != '-'){ alnLength = i+1; break; }
724 oligo = oligo.substr(0,alnLength);
725 temp = temp.substr(0,alnLength);
727 int numDiff = countDiffs(oligo, temp);
729 if(numDiff < minDiff){
732 minGroup = it->second;
734 for(int i=0;i<alnLength;i++){
740 else if(numDiff == minDiff){
746 if(minDiff > pdiffs) { success = minDiff; } //no good matches
747 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
748 else{ //use the best match
750 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
751 if(qual.getName() != ""){
752 if (!keepForward) { qual.trimQScores(minPos, -1); }
757 if (alignment != NULL) { delete alignment; }
764 catch(exception& e) {
765 m->errorOut(e, "TrimOligos", "stripForward");
769 //******************************************************************/
770 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
772 string rawSequence = seq.getUnaligned();
773 bool success = 0; //guilty until proven innocent
775 for(int i=0;i<revPrimer.size();i++){
776 string oligo = revPrimer[i];
778 if(rawSequence.length() < oligo.length()){
783 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
784 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
785 if(qual.getName() != ""){
786 qual.trimQScores(-1, rawSequence.length()-oligo.length());
795 catch(exception& e) {
796 m->errorOut(e, "TrimOligos", "stripReverse");
800 //******************************************************************/
801 bool TrimOligos::stripReverse(Sequence& seq){
804 string rawSequence = seq.getUnaligned();
805 bool success = 0; //guilty until proven innocent
807 for(int i=0;i<revPrimer.size();i++){
808 string oligo = revPrimer[i];
810 if(rawSequence.length() < oligo.length()){
815 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
816 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
825 catch(exception& e) {
826 m->errorOut(e, "TrimOligos", "stripReverse");
830 //******************************************************************/
831 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
833 string rawSequence = seq.getUnaligned();
834 bool success = ldiffs + 1; //guilty until proven innocent
836 for(int i=0;i<linker.size();i++){
837 string oligo = linker[i];
839 if(rawSequence.length() < oligo.length()){
840 success = ldiffs + 10;
844 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
845 seq.setUnaligned(rawSequence.substr(oligo.length()));
846 if(qual.getName() != ""){
847 qual.trimQScores(oligo.length(), -1);
854 //if you found the linker or if you don't want to allow for diffs
855 if ((ldiffs == 0) || (success == 0)) { return success; }
857 else { //try aligning and see if you can find it
861 Alignment* alignment;
862 if (linker.size() > 0) {
863 for(int i = 0; i < linker.size(); i++){
864 if(linker[i].length() > maxLength){
865 maxLength = linker[i].length();
868 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));
870 }else{ alignment = NULL; }
872 //can you find the barcode
877 for(int i = 0; i < linker.size(); i++){
878 string oligo = linker[i];
879 // int length = oligo.length();
881 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
882 success = ldiffs + 10;
886 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
887 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
888 oligo = alignment->getSeqAAln();
889 string temp = alignment->getSeqBAln();
891 int alnLength = oligo.length();
893 for(int i=oligo.length()-1;i>=0;i--){
894 if(oligo[i] != '-'){ alnLength = i+1; break; }
896 oligo = oligo.substr(0,alnLength);
897 temp = temp.substr(0,alnLength);
899 int numDiff = countDiffs(oligo, temp);
901 if(numDiff < minDiff){
905 for(int i=0;i<alnLength;i++){
911 else if(numDiff == minDiff){
917 if(minDiff > ldiffs) { success = minDiff; } //no good matches
918 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
919 else{ //use the best match
920 seq.setUnaligned(rawSequence.substr(minPos));
922 if(qual.getName() != ""){
923 qual.trimQScores(minPos, -1);
928 if (alignment != NULL) { delete alignment; }
936 catch(exception& e) {
937 m->errorOut(e, "TrimOligos", "stripLinker");
941 //******************************************************************/
942 bool TrimOligos::stripLinker(Sequence& seq){
945 string rawSequence = seq.getUnaligned();
946 bool success = ldiffs +1; //guilty until proven innocent
948 for(int i=0;i<linker.size();i++){
949 string oligo = linker[i];
951 if(rawSequence.length() < oligo.length()){
952 success = ldiffs +10;
956 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
957 seq.setUnaligned(rawSequence.substr(oligo.length()));
963 //if you found the linker or if you don't want to allow for diffs
964 if ((ldiffs == 0) || (success == 0)) { return success; }
966 else { //try aligning and see if you can find it
970 Alignment* alignment;
971 if (linker.size() > 0) {
972 for(int i = 0; i < linker.size(); i++){
973 if(linker[i].length() > maxLength){
974 maxLength = linker[i].length();
977 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));
979 }else{ alignment = NULL; }
981 //can you find the barcode
986 for(int i = 0; i < linker.size(); i++){
987 string oligo = linker[i];
988 // int length = oligo.length();
990 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
991 success = ldiffs + 10;
995 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
996 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
997 oligo = alignment->getSeqAAln();
998 string temp = alignment->getSeqBAln();
1000 int alnLength = oligo.length();
1002 for(int i=oligo.length()-1;i>=0;i--){
1003 if(oligo[i] != '-'){ alnLength = i+1; break; }
1005 oligo = oligo.substr(0,alnLength);
1006 temp = temp.substr(0,alnLength);
1008 int numDiff = countDiffs(oligo, temp);
1010 if(numDiff < minDiff){
1014 for(int i=0;i<alnLength;i++){
1020 else if(numDiff == minDiff){
1026 if(minDiff > ldiffs) { success = minDiff; } //no good matches
1027 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
1028 else{ //use the best match
1029 seq.setUnaligned(rawSequence.substr(minPos));
1033 if (alignment != NULL) { delete alignment; }
1040 catch(exception& e) {
1041 m->errorOut(e, "TrimOligos", "stripLinker");
1046 //******************************************************************/
1047 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
1049 string rawSequence = seq.getUnaligned();
1050 bool success = sdiffs+1; //guilty until proven innocent
1052 for(int i=0;i<spacer.size();i++){
1053 string oligo = spacer[i];
1055 if(rawSequence.length() < oligo.length()){
1056 success = sdiffs+10;
1060 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1061 seq.setUnaligned(rawSequence.substr(oligo.length()));
1062 if(qual.getName() != ""){
1063 qual.trimQScores(oligo.length(), -1);
1070 //if you found the spacer or if you don't want to allow for diffs
1071 if ((sdiffs == 0) || (success == 0)) { return success; }
1073 else { //try aligning and see if you can find it
1077 Alignment* alignment;
1078 if (spacer.size() > 0) {
1079 for(int i = 0; i < spacer.size(); i++){
1080 if(spacer[i].length() > maxLength){
1081 maxLength = spacer[i].length();
1084 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));
1086 }else{ alignment = NULL; }
1088 //can you find the barcode
1093 for(int i = 0; i < spacer.size(); i++){
1094 string oligo = spacer[i];
1095 // int length = oligo.length();
1097 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1098 success = sdiffs + 10;
1102 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1103 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1104 oligo = alignment->getSeqAAln();
1105 string temp = alignment->getSeqBAln();
1107 int alnLength = oligo.length();
1109 for(int i=oligo.length()-1;i>=0;i--){
1110 if(oligo[i] != '-'){ alnLength = i+1; break; }
1112 oligo = oligo.substr(0,alnLength);
1113 temp = temp.substr(0,alnLength);
1115 int numDiff = countDiffs(oligo, temp);
1117 if(numDiff < minDiff){
1121 for(int i=0;i<alnLength;i++){
1127 else if(numDiff == minDiff){
1133 if(minDiff > sdiffs) { success = minDiff; } //no good matches
1134 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
1135 else{ //use the best match
1136 seq.setUnaligned(rawSequence.substr(minPos));
1138 if(qual.getName() != ""){
1139 qual.trimQScores(minPos, -1);
1144 if (alignment != NULL) { delete alignment; }
1152 catch(exception& e) {
1153 m->errorOut(e, "TrimOligos", "stripSpacer");
1157 //******************************************************************/
1158 bool TrimOligos::stripSpacer(Sequence& seq){
1161 string rawSequence = seq.getUnaligned();
1162 bool success = sdiffs+1; //guilty until proven innocent
1164 for(int i=0;i<spacer.size();i++){
1165 string oligo = spacer[i];
1167 if(rawSequence.length() < oligo.length()){
1168 success = sdiffs+10;
1172 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1173 seq.setUnaligned(rawSequence.substr(oligo.length()));
1179 //if you found the spacer or if you don't want to allow for diffs
1180 if ((sdiffs == 0) || (success == 0)) { return success; }
1182 else { //try aligning and see if you can find it
1186 Alignment* alignment;
1187 if (spacer.size() > 0) {
1188 for(int i = 0; i < spacer.size(); i++){
1189 if(spacer[i].length() > maxLength){
1190 maxLength = spacer[i].length();
1193 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));
1195 }else{ alignment = NULL; }
1197 //can you find the barcode
1202 for(int i = 0; i < spacer.size(); i++){
1203 string oligo = spacer[i];
1204 // int length = oligo.length();
1206 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1207 success = sdiffs + 10;
1211 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1212 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1213 oligo = alignment->getSeqAAln();
1214 string temp = alignment->getSeqBAln();
1216 int alnLength = oligo.length();
1218 for(int i=oligo.length()-1;i>=0;i--){
1219 if(oligo[i] != '-'){ alnLength = i+1; break; }
1221 oligo = oligo.substr(0,alnLength);
1222 temp = temp.substr(0,alnLength);
1224 int numDiff = countDiffs(oligo, temp);
1226 if(numDiff < minDiff){
1230 for(int i=0;i<alnLength;i++){
1236 else if(numDiff == minDiff){
1242 if(minDiff > sdiffs) { success = minDiff; } //no good matches
1243 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
1244 else{ //use the best match
1245 seq.setUnaligned(rawSequence.substr(minPos));
1249 if (alignment != NULL) { delete alignment; }
1256 catch(exception& e) {
1257 m->errorOut(e, "TrimOligos", "stripSpacer");
1262 //******************************************************************/
1263 bool TrimOligos::compareDNASeq(string oligo, string seq){
1266 int length = oligo.length();
1268 for(int i=0;i<length;i++){
1270 if(oligo[i] != seq[i]){
1271 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1272 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1273 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1274 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1275 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1276 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1277 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1278 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1279 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1280 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1281 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1282 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1284 if(success == 0) { break; }
1293 catch(exception& e) {
1294 m->errorOut(e, "TrimOligos", "compareDNASeq");
1299 //********************************************************************/
1300 int TrimOligos::countDiffs(string oligo, string seq){
1303 int length = oligo.length();
1306 for(int i=0;i<length;i++){
1308 if(oligo[i] != seq[i]){
1309 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1310 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1311 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1312 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1313 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1314 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1315 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1316 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1317 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1318 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1319 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1320 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1327 catch(exception& e) {
1328 m->errorOut(e, "TrimOligos", "countDiffs");
1332 /********************************************************************/