5 * Created by westcott on 9/1/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
19 m = MothurOut::getInstance();
31 maxFBarcodeLength = 0;
32 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
33 if(it->first.length() > maxFBarcodeLength){
34 maxFBarcodeLength = it->first.length();
38 map<string,int>::iterator it;
39 for(it=primers.begin();it!=primers.end();it++){
40 if(it->first.length() > maxFPrimerLength){
41 maxFPrimerLength = it->first.length();
46 for(int i = 0; i < linker.size(); i++){
47 if(linker[i].length() > maxLinkerLength){
48 maxLinkerLength = linker[i].length();
53 for(int i = 0; i < spacer.size(); i++){
54 if(spacer[i].length() > maxSpacerLength){
55 maxSpacerLength = spacer[i].length();
60 m->errorOut(e, "TrimOligos", "TrimOligos");
64 /********************************************************************/
65 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
66 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
68 m = MothurOut::getInstance();
75 maxFBarcodeLength = 0;
76 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
77 string forward = it->second.forward;
78 map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
80 if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
82 if (itForward == ifbarcodes.end()) {
83 vector<int> temp; temp.push_back(it->first);
84 ifbarcodes[forward] = temp;
85 }else { itForward->second.push_back(it->first); }
89 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
90 string forward = it->second.forward;
91 map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
93 if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
95 if (itForward == ifprimers.end()) {
96 vector<int> temp; temp.push_back(it->first);
97 ifprimers[forward] = temp;
98 }else { itForward->second.push_back(it->first); }
101 maxRBarcodeLength = 0;
102 for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
103 string forward = it->second.reverse;
104 map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
106 if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
108 if (itForward == irbarcodes.end()) {
109 vector<int> temp; temp.push_back(it->first);
110 irbarcodes[forward] = temp;
111 }else { itForward->second.push_back(it->first); }
114 maxRPrimerLength = 0;
115 for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
116 string forward = it->second.reverse;
117 map<string, vector<int> >::iterator itForward = irprimers.find(forward);
119 if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
121 if (itForward == irprimers.end()) {
122 vector<int> temp; temp.push_back(it->first);
123 irprimers[forward] = temp;
124 }else { itForward->second.push_back(it->first); }
130 catch(exception& e) {
131 m->errorOut(e, "TrimOligos", "TrimOligos");
135 /********************************************************************/
136 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
137 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
139 m = MothurOut::getInstance();
148 maxFBarcodeLength = 0;
149 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
150 string oligo = it->first;
151 if(oligo.length() > maxFBarcodeLength){
152 maxFBarcodeLength = oligo.length();
155 maxRBarcodeLength = maxFBarcodeLength;
157 maxFPrimerLength = 0;
158 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
159 string oligo = it->first;
160 if(oligo.length() > maxFPrimerLength){
161 maxFPrimerLength = oligo.length();
164 maxRPrimerLength = maxFPrimerLength;
166 catch(exception& e) {
167 m->errorOut(e, "TrimOligos", "TrimOligos");
171 /********************************************************************/
172 TrimOligos::~TrimOligos() {}
173 //********************************************************************/
174 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
177 string rawSequence = seq.getUnaligned();
179 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
180 string oligo = it->first;
182 if(rawSequence.length() < oligo.length()) { break; }
185 int olength = oligo.length();
186 for (int j = 0; j < rawSequence.length()-olength; j++){
187 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
188 string rawChunk = rawSequence.substr(j, olength);
189 if(compareDNASeq(oligo, rawChunk)) {
191 primerEnd = primerStart + olength;
198 primerStart = 0; primerEnd = 0;
199 //if you don't want to allow for diffs
200 if (pdiffs == 0) { return false; }
201 else { //try aligning and see if you can find it
203 //can you find the barcode
207 Alignment* alignment;
208 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
209 else{ alignment = NULL; }
211 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
213 string prim = it->first;
215 int olength = prim.length();
216 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
218 for (int j = 0; j < rawSequence.length()-olength; j++){
220 string oligo = it->first;
222 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
224 string rawChunk = rawSequence.substr(j, olength+pdiffs);
226 //use needleman to align first primer.length()+numdiffs of sequence to each barcode
227 alignment->align(oligo, rawChunk);
228 oligo = alignment->getSeqAAln();
229 string temp = alignment->getSeqBAln();
231 int alnLength = oligo.length();
233 for(int i=oligo.length()-1;i>=0;i--){
234 if(oligo[i] != '-'){ alnLength = i+1; break; }
236 oligo = oligo.substr(0,alnLength);
237 temp = temp.substr(0,alnLength);
239 int numDiff = countDiffs(oligo, temp);
241 if(numDiff < minDiff){
245 primerEnd = primerStart + alnLength;
246 }else if(numDiff == minDiff){ minCount++; }
251 if (alignment != NULL) { delete alignment; }
253 if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches
254 else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers
258 primerStart = 0; primerEnd = 0;
262 catch(exception& e) {
263 m->errorOut(e, "TrimOligos", "stripForward");
267 //******************************************************************/
268 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
271 string rawSequence = seq.getUnaligned();
273 for(int i=0;i<revPrimer.size();i++){
274 string oligo = revPrimer[i];
275 if(rawSequence.length() < oligo.length()) { break; }
278 int olength = oligo.length();
279 for (int j = rawSequence.length()-olength; j >= 0; j--){
280 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
281 string rawChunk = rawSequence.substr(j, olength);
283 if(compareDNASeq(oligo, rawChunk)) {
285 primerEnd = primerStart + olength;
292 primerStart = 0; primerEnd = 0;
295 catch(exception& e) {
296 m->errorOut(e, "PcrSeqsCommand", "findReverse");
300 //*******************************************************************/
302 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
305 string rawSequence = seq.getUnaligned();
306 int success = bdiffs + 1; //guilty until proven innocent
308 //can you find the barcode
309 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
310 string oligo = it->first;
311 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
312 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
316 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
318 seq.setUnaligned(rawSequence.substr(oligo.length()));
320 if(qual.getName() != ""){
321 qual.trimQScores(oligo.length(), -1);
329 //if you found the barcode or if you don't want to allow for diffs
330 if ((bdiffs == 0) || (success == 0)) { return success; }
332 else { //try aligning and see if you can find it
333 Alignment* alignment;
334 if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
335 else{ alignment = NULL; }
337 //can you find the barcode
343 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
344 string oligo = it->first;
345 // int length = oligo.length();
347 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
348 success = bdiffs + 10;
352 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
353 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
354 oligo = alignment->getSeqAAln();
355 string temp = alignment->getSeqBAln();
357 int alnLength = oligo.length();
359 for(int i=oligo.length()-1;i>=0;i--){
360 if(oligo[i] != '-'){ alnLength = i+1; break; }
362 oligo = oligo.substr(0,alnLength);
363 temp = temp.substr(0,alnLength);
364 int numDiff = countDiffs(oligo, temp);
366 if(numDiff < minDiff){
369 minGroup = it->second;
371 for(int i=0;i<alnLength;i++){
377 else if(numDiff == minDiff){
383 if(minDiff > bdiffs) { success = minDiff; } //no good matches
384 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
385 else{ //use the best match
387 seq.setUnaligned(rawSequence.substr(minPos));
389 if(qual.getName() != ""){
390 qual.trimQScores(minPos, -1);
395 if (alignment != NULL) { delete alignment; }
402 catch(exception& e) {
403 m->errorOut(e, "TrimOligos", "stripBarcode");
407 //*******************************************************************/
408 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
410 //look for forward barcode
411 string rawFSequence = forwardSeq.getUnaligned();
412 string rawRSequence = reverseSeq.getUnaligned();
413 int success = bdiffs + 1; //guilty until proven innocent
415 //can you find the forward barcode
416 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
417 string foligo = it->second.forward;
418 string roligo = it->second.reverse;
420 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
421 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
424 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
425 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
429 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
431 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
432 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
438 //if you found the barcode or if you don't want to allow for diffs
439 if ((bdiffs == 0) || (success == 0)) { return success; }
440 else { //try aligning and see if you can find it
441 Alignment* alignment;
442 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
443 else{ alignment = NULL; }
445 //can you find the barcode
448 vector< vector<int> > minFGroup;
451 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
461 only want to look for forward = Sarah, John, Anna, Pat, Gail
462 reverse = Westcott, Schloss, Brown, Moore
464 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
466 //cout << endl << forwardSeq.getName() << endl;
467 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
468 string oligo = it->first;
470 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
471 success = bdiffs + 10;
474 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
475 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
476 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
477 oligo = alignment->getSeqAAln();
478 string temp = alignment->getSeqBAln();
480 int alnLength = oligo.length();
482 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
483 oligo = oligo.substr(0,alnLength);
484 temp = temp.substr(0,alnLength);
485 int numDiff = countDiffs(oligo, temp);
487 if (alnLength == 0) { numDiff = bdiffs + 100; }
488 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
490 if(numDiff < minDiff){
494 minFGroup.push_back(it->second);
497 for(int i=0;i<alnLength;i++){
502 minFPos.push_back(tempminFPos);
503 }else if(numDiff == minDiff){
504 minFGroup.push_back(it->second);
506 for(int i=0;i<alnLength;i++){
511 minFPos.push_back(tempminFPos);
515 //cout << minDiff << '\t' << minCount << '\t' << endl;
516 if(minDiff > bdiffs) { success = minDiff; } //no good matches
518 //check for reverse match
519 if (alignment != NULL) { delete alignment; }
521 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
522 else{ alignment = NULL; }
524 //can you find the barcode
527 vector< vector<int> > minRGroup;
530 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
531 string oligo = it->first;
532 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
533 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
534 success = bdiffs + 10;
538 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
539 alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
540 oligo = alignment->getSeqAAln();
541 string temp = alignment->getSeqBAln();
543 int alnLength = oligo.length();
544 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
545 oligo = oligo.substr(0,alnLength);
546 temp = temp.substr(0,alnLength);
547 int numDiff = countDiffs(oligo, temp);
548 if (alnLength == 0) { numDiff = bdiffs + 100; }
550 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
551 if(numDiff < minDiff){
555 minRGroup.push_back(it->second);
558 for(int i=0;i<alnLength;i++){
563 minRPos.push_back(tempminRPos);
564 }else if(numDiff == minDiff){
566 for(int i=0;i<alnLength;i++){
571 minRPos.push_back(tempminRPos);
572 minRGroup.push_back(it->second);
577 /*cout << minDiff << '\t' << minCount << '\t' << endl;
578 for (int i = 0; i < minFGroup.size(); i++) {
580 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
584 for (int i = 0; i < minRGroup.size(); i++) {
586 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
590 if(minDiff > bdiffs) { success = minDiff; } //no good matches
592 bool foundMatch = false;
594 for (int i = 0; i < minFGroup.size(); i++) {
595 for (int j = 0; j < minFGroup[i].size(); j++) {
596 for (int k = 0; k < minRGroup.size(); k++) {
597 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
604 if (matches.size() == 1) {
611 //we have an acceptable match for the forward and reverse, but do they match?
613 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
614 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
616 }else { success = bdiffs + 100; }
620 if (alignment != NULL) { delete alignment; }
626 catch(exception& e) {
627 m->errorOut(e, "TrimOligos", "stripIBarcode");
632 //*******************************************************************/
633 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
635 //look for forward barcode
636 string rawFSequence = forwardSeq.getUnaligned();
637 string rawRSequence = reverseSeq.getUnaligned();
638 int success = bdiffs + 1; //guilty until proven innocent
640 //can you find the forward barcode
641 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
642 string foligo = it->second.forward;
643 string roligo = it->second.reverse;
645 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
646 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
649 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
650 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
654 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
656 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
657 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
658 forwardQual.trimQScores(foligo.length(), -1);
659 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
665 //if you found the barcode or if you don't want to allow for diffs
666 if ((bdiffs == 0) || (success == 0)) { return success; }
667 else { //try aligning and see if you can find it
668 Alignment* alignment;
669 if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
670 else{ alignment = NULL; }
672 //can you find the barcode
675 vector< vector<int> > minFGroup;
678 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
688 only want to look for forward = Sarah, John, Anna, Pat, Gail
689 reverse = Westcott, Schloss, Brown, Moore
691 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
693 //cout << endl << forwardSeq.getName() << endl;
694 for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
695 string oligo = it->first;
697 if(rawFSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
698 success = bdiffs + 10;
701 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
702 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
703 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
704 oligo = alignment->getSeqAAln();
705 string temp = alignment->getSeqBAln();
707 int alnLength = oligo.length();
709 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
710 oligo = oligo.substr(0,alnLength);
711 temp = temp.substr(0,alnLength);
712 int numDiff = countDiffs(oligo, temp);
714 if (alnLength == 0) { numDiff = bdiffs + 100; }
715 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
717 if(numDiff < minDiff){
721 minFGroup.push_back(it->second);
724 for(int i=0;i<alnLength;i++){
729 minFPos.push_back(tempminFPos);
730 }else if(numDiff == minDiff){
731 minFGroup.push_back(it->second);
733 for(int i=0;i<alnLength;i++){
738 minFPos.push_back(tempminFPos);
742 //cout << minDiff << '\t' << minCount << '\t' << endl;
743 if(minDiff > bdiffs) { success = minDiff; } //no good matches
745 //check for reverse match
746 if (alignment != NULL) { delete alignment; }
748 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
749 else{ alignment = NULL; }
751 //can you find the barcode
754 vector< vector<int> > minRGroup;
757 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
758 string oligo = it->first;
759 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
760 if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
761 success = bdiffs + 10;
765 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
766 alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
767 oligo = alignment->getSeqAAln();
768 string temp = alignment->getSeqBAln();
770 int alnLength = oligo.length();
771 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
772 oligo = oligo.substr(0,alnLength);
773 temp = temp.substr(0,alnLength);
774 int numDiff = countDiffs(oligo, temp);
775 if (alnLength == 0) { numDiff = bdiffs + 100; }
777 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
778 if(numDiff < minDiff){
782 minRGroup.push_back(it->second);
785 for(int i=0;i<alnLength;i++){
790 minRPos.push_back(tempminRPos);
791 }else if(numDiff == minDiff){
793 for(int i=0;i<alnLength;i++){
798 minRPos.push_back(tempminRPos);
799 minRGroup.push_back(it->second);
804 /*cout << minDiff << '\t' << minCount << '\t' << endl;
805 for (int i = 0; i < minFGroup.size(); i++) {
807 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
811 for (int i = 0; i < minRGroup.size(); i++) {
813 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
817 if(minDiff > bdiffs) { success = minDiff; } //no good matches
819 bool foundMatch = false;
821 for (int i = 0; i < minFGroup.size(); i++) {
822 for (int j = 0; j < minFGroup[i].size(); j++) {
823 for (int k = 0; k < minRGroup.size(); k++) {
824 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
831 if (matches.size() == 1) {
838 //we have an acceptable match for the forward and reverse, but do they match?
840 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
841 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
842 forwardQual.trimQScores(fStart, -1);
843 reverseQual.trimQScores(rStart, -1);
845 }else { success = bdiffs + 100; }
849 if (alignment != NULL) { delete alignment; }
855 catch(exception& e) {
856 m->errorOut(e, "TrimOligos", "stripIBarcode");
861 //*******************************************************************/
862 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
864 //look for forward barcode
865 string rawFSequence = forwardSeq.getUnaligned();
866 string rawRSequence = reverseSeq.getUnaligned();
867 int success = pdiffs + 1; //guilty until proven innocent
869 //can you find the forward barcode
870 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
871 string foligo = it->second.forward;
872 string roligo = it->second.reverse;
874 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
875 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
878 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
879 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
883 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
885 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
886 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
887 forwardQual.trimQScores(foligo.length(), -1);
888 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
894 //if you found the barcode or if you don't want to allow for diffs
895 if ((pdiffs == 0) || (success == 0)) { return success; }
896 else { //try aligning and see if you can find it
897 Alignment* alignment;
898 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
899 else{ alignment = NULL; }
901 //can you find the barcode
904 vector< vector<int> > minFGroup;
907 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
917 only want to look for forward = Sarah, John, Anna, Pat, Gail
918 reverse = Westcott, Schloss, Brown, Moore
920 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
922 //cout << endl << forwardSeq.getName() << endl;
923 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
924 string oligo = it->first;
926 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
927 success = pdiffs + 10;
930 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
931 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
932 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
933 oligo = alignment->getSeqAAln();
934 string temp = alignment->getSeqBAln();
936 int alnLength = oligo.length();
938 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
939 oligo = oligo.substr(0,alnLength);
940 temp = temp.substr(0,alnLength);
941 int numDiff = countDiffs(oligo, temp);
943 if (alnLength == 0) { numDiff = pdiffs + 100; }
944 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
946 if(numDiff < minDiff){
950 minFGroup.push_back(it->second);
953 for(int i=0;i<alnLength;i++){
958 minFPos.push_back(tempminFPos);
959 }else if(numDiff == minDiff){
960 minFGroup.push_back(it->second);
962 for(int i=0;i<alnLength;i++){
967 minFPos.push_back(tempminFPos);
971 //cout << minDiff << '\t' << minCount << '\t' << endl;
972 if(minDiff > pdiffs) { success = minDiff; } //no good matches
974 //check for reverse match
975 if (alignment != NULL) { delete alignment; }
977 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
978 else{ alignment = NULL; }
980 //can you find the barcode
983 vector< vector<int> > minRGroup;
986 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
987 string oligo = it->first;
988 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
989 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
990 success = pdiffs + 10;
994 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
995 alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
996 oligo = alignment->getSeqAAln();
997 string temp = alignment->getSeqBAln();
999 int alnLength = oligo.length();
1000 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1001 oligo = oligo.substr(0,alnLength);
1002 temp = temp.substr(0,alnLength);
1003 int numDiff = countDiffs(oligo, temp);
1004 if (alnLength == 0) { numDiff = pdiffs + 100; }
1006 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1007 if(numDiff < minDiff){
1011 minRGroup.push_back(it->second);
1012 int tempminRPos = 0;
1014 for(int i=0;i<alnLength;i++){
1019 minRPos.push_back(tempminRPos);
1020 }else if(numDiff == minDiff){
1021 int tempminRPos = 0;
1022 for(int i=0;i<alnLength;i++){
1027 minRPos.push_back(tempminRPos);
1028 minRGroup.push_back(it->second);
1033 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1034 for (int i = 0; i < minFGroup.size(); i++) {
1036 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1040 for (int i = 0; i < minRGroup.size(); i++) {
1042 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1046 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1048 bool foundMatch = false;
1049 vector<int> matches;
1050 for (int i = 0; i < minFGroup.size(); i++) {
1051 for (int j = 0; j < minFGroup[i].size(); j++) {
1052 for (int k = 0; k < minRGroup.size(); k++) {
1053 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1060 if (matches.size() == 1) {
1063 fStart = minFPos[0];
1064 rStart = minRPos[0];
1067 //we have an acceptable match for the forward and reverse, but do they match?
1069 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1070 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1071 forwardQual.trimQScores(fStart, -1);
1072 reverseQual.trimQScores(rStart, -1);
1074 }else { success = pdiffs + 100; }
1078 if (alignment != NULL) { delete alignment; }
1084 catch(exception& e) {
1085 m->errorOut(e, "TrimOligos", "stripIForward");
1090 //*******************************************************************/
1091 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1093 //look for forward barcode
1094 string rawFSequence = forwardSeq.getUnaligned();
1095 string rawRSequence = reverseSeq.getUnaligned();
1096 int success = pdiffs + 1; //guilty until proven innocent
1098 //can you find the forward barcode
1099 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1100 string foligo = it->second.forward;
1101 string roligo = it->second.reverse;
1103 if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
1104 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1107 if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
1108 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1112 if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1114 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1115 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1121 //if you found the barcode or if you don't want to allow for diffs
1122 if ((pdiffs == 0) || (success == 0)) { return success; }
1123 else { //try aligning and see if you can find it
1124 Alignment* alignment;
1125 if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1126 else{ alignment = NULL; }
1128 //can you find the barcode
1131 vector< vector<int> > minFGroup;
1132 vector<int> minFPos;
1134 //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1144 only want to look for forward = Sarah, John, Anna, Pat, Gail
1145 reverse = Westcott, Schloss, Brown, Moore
1147 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1149 //cout << endl << forwardSeq.getName() << endl;
1150 for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1151 string oligo = it->first;
1153 if(rawFSequence.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1154 success = pdiffs + 10;
1157 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1158 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1159 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1160 oligo = alignment->getSeqAAln();
1161 string temp = alignment->getSeqBAln();
1163 int alnLength = oligo.length();
1165 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1166 oligo = oligo.substr(0,alnLength);
1167 temp = temp.substr(0,alnLength);
1168 int numDiff = countDiffs(oligo, temp);
1170 if (alnLength == 0) { numDiff = pdiffs + 100; }
1171 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1173 if(numDiff < minDiff){
1177 minFGroup.push_back(it->second);
1178 int tempminFPos = 0;
1180 for(int i=0;i<alnLength;i++){
1185 minFPos.push_back(tempminFPos);
1186 }else if(numDiff == minDiff){
1187 minFGroup.push_back(it->second);
1188 int tempminFPos = 0;
1189 for(int i=0;i<alnLength;i++){
1194 minFPos.push_back(tempminFPos);
1198 //cout << minDiff << '\t' << minCount << '\t' << endl;
1199 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1201 //check for reverse match
1202 if (alignment != NULL) { delete alignment; }
1204 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1205 else{ alignment = NULL; }
1207 //can you find the barcode
1210 vector< vector<int> > minRGroup;
1211 vector<int> minRPos;
1213 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1214 string oligo = it->first;
1215 //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1216 if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
1217 success = pdiffs + 10;
1221 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1222 alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1223 oligo = alignment->getSeqAAln();
1224 string temp = alignment->getSeqBAln();
1226 int alnLength = oligo.length();
1227 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
1228 oligo = oligo.substr(0,alnLength);
1229 temp = temp.substr(0,alnLength);
1230 int numDiff = countDiffs(oligo, temp);
1231 if (alnLength == 0) { numDiff = pdiffs + 100; }
1233 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1234 if(numDiff < minDiff){
1238 minRGroup.push_back(it->second);
1239 int tempminRPos = 0;
1241 for(int i=0;i<alnLength;i++){
1246 minRPos.push_back(tempminRPos);
1247 }else if(numDiff == minDiff){
1248 int tempminRPos = 0;
1249 for(int i=0;i<alnLength;i++){
1254 minRPos.push_back(tempminRPos);
1255 minRGroup.push_back(it->second);
1260 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1261 for (int i = 0; i < minFGroup.size(); i++) {
1263 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1267 for (int i = 0; i < minRGroup.size(); i++) {
1269 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1273 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1275 bool foundMatch = false;
1276 vector<int> matches;
1277 for (int i = 0; i < minFGroup.size(); i++) {
1278 for (int j = 0; j < minFGroup[i].size(); j++) {
1279 for (int k = 0; k < minRGroup.size(); k++) {
1280 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1287 if (matches.size() == 1) {
1290 fStart = minFPos[0];
1291 rStart = minRPos[0];
1294 //we have an acceptable match for the forward and reverse, but do they match?
1296 forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1297 reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1299 }else { success = pdiffs + 100; }
1303 if (alignment != NULL) { delete alignment; }
1309 catch(exception& e) {
1310 m->errorOut(e, "TrimOligos", "stripIForward");
1315 //*******************************************************************/
1316 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1319 string rawSequence = seq.getUnaligned();
1320 int success = bdiffs + 1; //guilty until proven innocent
1322 //can you find the barcode
1323 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1324 string oligo = it->first;
1325 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1326 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1330 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1332 seq.setUnaligned(rawSequence.substr(oligo.length()));
1339 //if you found the barcode or if you don't want to allow for diffs
1340 if ((bdiffs == 0) || (success == 0)) { return success; }
1342 else { //try aligning and see if you can find it
1343 Alignment* alignment;
1344 if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1345 else{ alignment = NULL; }
1347 //can you find the barcode
1353 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1354 string oligo = it->first;
1355 // int length = oligo.length();
1357 if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
1358 success = bdiffs + 10;
1362 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1363 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1364 oligo = alignment->getSeqAAln();
1365 string temp = alignment->getSeqBAln();
1367 int alnLength = oligo.length();
1369 for(int i=oligo.length()-1;i>=0;i--){
1370 if(oligo[i] != '-'){ alnLength = i+1; break; }
1372 oligo = oligo.substr(0,alnLength);
1373 temp = temp.substr(0,alnLength);
1375 int numDiff = countDiffs(oligo, temp);
1377 if(numDiff < minDiff){
1380 minGroup = it->second;
1382 for(int i=0;i<alnLength;i++){
1388 else if(numDiff == minDiff){
1394 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1395 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1396 else{ //use the best match
1398 seq.setUnaligned(rawSequence.substr(minPos));
1402 if (alignment != NULL) { delete alignment; }
1409 catch(exception& e) {
1410 m->errorOut(e, "TrimOligos", "stripBarcode");
1416 /********************************************************************/
1417 int TrimOligos::stripForward(Sequence& seq, int& group){
1420 string rawSequence = seq.getUnaligned();
1421 int success = pdiffs + 1; //guilty until proven innocent
1423 //can you find the primer
1424 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1425 string oligo = it->first;
1426 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1427 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1431 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1433 seq.setUnaligned(rawSequence.substr(oligo.length()));
1439 //if you found the barcode or if you don't want to allow for diffs
1440 if ((pdiffs == 0) || (success == 0)) { return success; }
1442 else { //try aligning and see if you can find it
1443 Alignment* alignment;
1444 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1445 else{ alignment = NULL; }
1447 //can you find the barcode
1453 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1454 string oligo = it->first;
1455 // int length = oligo.length();
1457 if(rawSequence.length() < maxFPrimerLength){
1458 success = pdiffs + 100;
1462 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1463 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1464 oligo = alignment->getSeqAAln();
1465 string temp = alignment->getSeqBAln();
1467 int alnLength = oligo.length();
1469 for(int i=oligo.length()-1;i>=0;i--){
1470 if(oligo[i] != '-'){ alnLength = i+1; break; }
1472 oligo = oligo.substr(0,alnLength);
1473 temp = temp.substr(0,alnLength);
1475 int numDiff = countDiffs(oligo, temp);
1477 if(numDiff < minDiff){
1480 minGroup = it->second;
1482 for(int i=0;i<alnLength;i++){
1488 else if(numDiff == minDiff){
1494 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1495 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1496 else{ //use the best match
1498 seq.setUnaligned(rawSequence.substr(minPos));
1502 if (alignment != NULL) { delete alignment; }
1509 catch(exception& e) {
1510 m->errorOut(e, "TrimOligos", "stripForward");
1514 //*******************************************************************/
1515 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1517 string rawSequence = seq.getUnaligned();
1518 int success = pdiffs + 1; //guilty until proven innocent
1520 //can you find the primer
1521 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1522 string oligo = it->first;
1523 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1524 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1528 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1530 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1531 if(qual.getName() != ""){
1532 if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
1539 //if you found the barcode or if you don't want to allow for diffs
1540 if ((pdiffs == 0) || (success == 0)) { return success; }
1542 else { //try aligning and see if you can find it
1543 Alignment* alignment;
1544 if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1545 else{ alignment = NULL; }
1547 //can you find the barcode
1553 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1554 string oligo = it->first;
1555 // int length = oligo.length();
1557 if(rawSequence.length() < maxFPrimerLength){
1558 success = pdiffs + 100;
1562 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1563 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1564 oligo = alignment->getSeqAAln();
1565 string temp = alignment->getSeqBAln();
1567 int alnLength = oligo.length();
1569 for(int i=oligo.length()-1;i>=0;i--){
1570 if(oligo[i] != '-'){ alnLength = i+1; break; }
1572 oligo = oligo.substr(0,alnLength);
1573 temp = temp.substr(0,alnLength);
1575 int numDiff = countDiffs(oligo, temp);
1577 if(numDiff < minDiff){
1580 minGroup = it->second;
1582 for(int i=0;i<alnLength;i++){
1588 else if(numDiff == minDiff){
1594 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1595 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1596 else{ //use the best match
1598 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
1599 if(qual.getName() != ""){
1600 if (!keepForward) { qual.trimQScores(minPos, -1); }
1605 if (alignment != NULL) { delete alignment; }
1612 catch(exception& e) {
1613 m->errorOut(e, "TrimOligos", "stripForward");
1617 //******************************************************************/
1618 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
1620 string rawSequence = seq.getUnaligned();
1621 bool success = 0; //guilty until proven innocent
1623 for(int i=0;i<revPrimer.size();i++){
1624 string oligo = revPrimer[i];
1626 if(rawSequence.length() < oligo.length()){
1631 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1632 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1633 if(qual.getName() != ""){
1634 qual.trimQScores(-1, rawSequence.length()-oligo.length());
1643 catch(exception& e) {
1644 m->errorOut(e, "TrimOligos", "stripReverse");
1648 //******************************************************************/
1649 bool TrimOligos::stripReverse(Sequence& seq){
1652 string rawSequence = seq.getUnaligned();
1653 bool success = 0; //guilty until proven innocent
1655 for(int i=0;i<revPrimer.size();i++){
1656 string oligo = revPrimer[i];
1658 if(rawSequence.length() < oligo.length()){
1663 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1664 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1673 catch(exception& e) {
1674 m->errorOut(e, "TrimOligos", "stripReverse");
1678 //******************************************************************/
1679 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
1681 string rawSequence = seq.getUnaligned();
1682 bool success = ldiffs + 1; //guilty until proven innocent
1684 for(int i=0;i<linker.size();i++){
1685 string oligo = linker[i];
1687 if(rawSequence.length() < oligo.length()){
1688 success = ldiffs + 10;
1692 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1693 seq.setUnaligned(rawSequence.substr(oligo.length()));
1694 if(qual.getName() != ""){
1695 qual.trimQScores(oligo.length(), -1);
1702 //if you found the linker or if you don't want to allow for diffs
1703 if ((ldiffs == 0) || (success == 0)) { return success; }
1705 else { //try aligning and see if you can find it
1706 Alignment* alignment;
1707 if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
1708 else{ alignment = NULL; }
1710 //can you find the barcode
1715 for(int i = 0; i < linker.size(); i++){
1716 string oligo = linker[i];
1717 // int length = oligo.length();
1719 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
1720 success = ldiffs + 10;
1724 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1725 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1726 oligo = alignment->getSeqAAln();
1727 string temp = alignment->getSeqBAln();
1729 int alnLength = oligo.length();
1731 for(int i=oligo.length()-1;i>=0;i--){
1732 if(oligo[i] != '-'){ alnLength = i+1; break; }
1734 oligo = oligo.substr(0,alnLength);
1735 temp = temp.substr(0,alnLength);
1737 int numDiff = countDiffs(oligo, temp);
1739 if(numDiff < minDiff){
1743 for(int i=0;i<alnLength;i++){
1749 else if(numDiff == minDiff){
1755 if(minDiff > ldiffs) { success = minDiff; } //no good matches
1756 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
1757 else{ //use the best match
1758 seq.setUnaligned(rawSequence.substr(minPos));
1760 if(qual.getName() != ""){
1761 qual.trimQScores(minPos, -1);
1766 if (alignment != NULL) { delete alignment; }
1774 catch(exception& e) {
1775 m->errorOut(e, "TrimOligos", "stripLinker");
1779 //******************************************************************/
1780 bool TrimOligos::stripLinker(Sequence& seq){
1783 string rawSequence = seq.getUnaligned();
1784 bool success = ldiffs +1; //guilty until proven innocent
1786 for(int i=0;i<linker.size();i++){
1787 string oligo = linker[i];
1789 if(rawSequence.length() < oligo.length()){
1790 success = ldiffs +10;
1794 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1795 seq.setUnaligned(rawSequence.substr(oligo.length()));
1801 //if you found the linker or if you don't want to allow for diffs
1802 if ((ldiffs == 0) || (success == 0)) { return success; }
1804 else { //try aligning and see if you can find it
1805 Alignment* alignment;
1806 if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
1807 else{ alignment = NULL; }
1809 //can you find the barcode
1814 for(int i = 0; i < linker.size(); i++){
1815 string oligo = linker[i];
1816 // int length = oligo.length();
1818 if(rawSequence.length() < maxLinkerLength){ //let's just assume that the barcodes are the same length
1819 success = ldiffs + 10;
1823 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1824 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1825 oligo = alignment->getSeqAAln();
1826 string temp = alignment->getSeqBAln();
1828 int alnLength = oligo.length();
1830 for(int i=oligo.length()-1;i>=0;i--){
1831 if(oligo[i] != '-'){ alnLength = i+1; break; }
1833 oligo = oligo.substr(0,alnLength);
1834 temp = temp.substr(0,alnLength);
1836 int numDiff = countDiffs(oligo, temp);
1838 if(numDiff < minDiff){
1842 for(int i=0;i<alnLength;i++){
1848 else if(numDiff == minDiff){
1854 if(minDiff > ldiffs) { success = minDiff; } //no good matches
1855 else if(minCount > 1) { success = ldiffs + 100; } //can't tell the difference between multiple barcodes
1856 else{ //use the best match
1857 seq.setUnaligned(rawSequence.substr(minPos));
1861 if (alignment != NULL) { delete alignment; }
1868 catch(exception& e) {
1869 m->errorOut(e, "TrimOligos", "stripLinker");
1874 //******************************************************************/
1875 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
1877 string rawSequence = seq.getUnaligned();
1878 bool success = sdiffs+1; //guilty until proven innocent
1880 for(int i=0;i<spacer.size();i++){
1881 string oligo = spacer[i];
1883 if(rawSequence.length() < oligo.length()){
1884 success = sdiffs+10;
1888 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1889 seq.setUnaligned(rawSequence.substr(oligo.length()));
1890 if(qual.getName() != ""){
1891 qual.trimQScores(oligo.length(), -1);
1898 //if you found the spacer or if you don't want to allow for diffs
1899 if ((sdiffs == 0) || (success == 0)) { return success; }
1901 else { //try aligning and see if you can find it
1902 Alignment* alignment;
1903 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
1904 else{ alignment = NULL; }
1906 //can you find the barcode
1911 for(int i = 0; i < spacer.size(); i++){
1912 string oligo = spacer[i];
1913 // int length = oligo.length();
1915 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
1916 success = sdiffs + 10;
1920 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1921 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1922 oligo = alignment->getSeqAAln();
1923 string temp = alignment->getSeqBAln();
1925 int alnLength = oligo.length();
1927 for(int i=oligo.length()-1;i>=0;i--){
1928 if(oligo[i] != '-'){ alnLength = i+1; break; }
1930 oligo = oligo.substr(0,alnLength);
1931 temp = temp.substr(0,alnLength);
1933 int numDiff = countDiffs(oligo, temp);
1935 if(numDiff < minDiff){
1939 for(int i=0;i<alnLength;i++){
1945 else if(numDiff == minDiff){
1951 if(minDiff > sdiffs) { success = minDiff; } //no good matches
1952 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
1953 else{ //use the best match
1954 seq.setUnaligned(rawSequence.substr(minPos));
1956 if(qual.getName() != ""){
1957 qual.trimQScores(minPos, -1);
1962 if (alignment != NULL) { delete alignment; }
1970 catch(exception& e) {
1971 m->errorOut(e, "TrimOligos", "stripSpacer");
1975 //******************************************************************/
1976 bool TrimOligos::stripSpacer(Sequence& seq){
1979 string rawSequence = seq.getUnaligned();
1980 bool success = sdiffs+1; //guilty until proven innocent
1982 for(int i=0;i<spacer.size();i++){
1983 string oligo = spacer[i];
1985 if(rawSequence.length() < oligo.length()){
1986 success = sdiffs+10;
1990 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1991 seq.setUnaligned(rawSequence.substr(oligo.length()));
1997 //if you found the spacer or if you don't want to allow for diffs
1998 if ((sdiffs == 0) || (success == 0)) { return success; }
2000 else { //try aligning and see if you can find it
2001 Alignment* alignment;
2002 if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2003 else{ alignment = NULL; }
2005 //can you find the barcode
2010 for(int i = 0; i < spacer.size(); i++){
2011 string oligo = spacer[i];
2012 // int length = oligo.length();
2014 if(rawSequence.length() < maxSpacerLength){ //let's just assume that the barcodes are the same length
2015 success = sdiffs + 10;
2019 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2020 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2021 oligo = alignment->getSeqAAln();
2022 string temp = alignment->getSeqBAln();
2024 int alnLength = oligo.length();
2026 for(int i=oligo.length()-1;i>=0;i--){
2027 if(oligo[i] != '-'){ alnLength = i+1; break; }
2029 oligo = oligo.substr(0,alnLength);
2030 temp = temp.substr(0,alnLength);
2032 int numDiff = countDiffs(oligo, temp);
2034 if(numDiff < minDiff){
2038 for(int i=0;i<alnLength;i++){
2044 else if(numDiff == minDiff){
2050 if(minDiff > sdiffs) { success = minDiff; } //no good matches
2051 else if(minCount > 1) { success = sdiffs + 100; } //can't tell the difference between multiple barcodes
2052 else{ //use the best match
2053 seq.setUnaligned(rawSequence.substr(minPos));
2057 if (alignment != NULL) { delete alignment; }
2064 catch(exception& e) {
2065 m->errorOut(e, "TrimOligos", "stripSpacer");
2070 //******************************************************************/
2071 bool TrimOligos::compareDNASeq(string oligo, string seq){
2074 int length = oligo.length();
2076 for(int i=0;i<length;i++){
2078 if(oligo[i] != seq[i]){
2079 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
2080 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
2081 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
2082 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
2083 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
2084 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2085 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
2086 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2087 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2088 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
2089 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
2090 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
2092 if(success == 0) { break; }
2101 catch(exception& e) {
2102 m->errorOut(e, "TrimOligos", "compareDNASeq");
2107 //********************************************************************/
2108 int TrimOligos::countDiffs(string oligo, string seq){
2111 int length = oligo.length();
2114 for(int i=0;i<length;i++){
2116 if(oligo[i] != seq[i]){
2117 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
2118 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
2119 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
2120 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
2121 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
2122 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2123 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
2124 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2125 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2126 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
2127 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
2128 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
2135 catch(exception& e) {
2136 m->errorOut(e, "TrimOligos", "countDiffs");
2140 /********************************************************************/