]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
start adding debugging to trimoligos
[mothur.git] / trimoligos.cpp
1 /*
2  * trimoligos.cpp
3  * Mothur
4  *
5  * Created by westcott on 9/1/11.
6  * Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18     try {
19         m = MothurOut::getInstance();
20         paired = false;
21         
22         pdiffs = p;
23         bdiffs = b;
24         ldiffs = l;
25         sdiffs = s;
26         
27         barcodes = br;
28         primers = pr;
29         revPrimer = r;
30         linker = lk;
31         spacer = sp;
32         maxFBarcodeLength = 0;
33         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34             if(it->first.length() > maxFBarcodeLength){
35                 maxFBarcodeLength = it->first.length();
36             }
37         }
38         maxFPrimerLength = 0;
39         map<string,int>::iterator it;
40         for(it=primers.begin();it!=primers.end();it++){
41             if(it->first.length() > maxFPrimerLength){
42                 maxFPrimerLength = it->first.length();
43             }
44         }
45         
46         maxLinkerLength = 0;
47         for(int i = 0; i < linker.size(); i++){
48             if(linker[i].length() > maxLinkerLength){
49                 maxLinkerLength = linker[i].length();
50             }
51         }
52         
53         maxSpacerLength = 0;
54         for(int i = 0; i < spacer.size(); i++){
55             if(spacer[i].length() > maxSpacerLength){
56                 maxSpacerLength = spacer[i].length();
57             }
58         }
59     }
60     catch(exception& e) {
61         m->errorOut(e, "TrimOligos", "TrimOligos");
62         exit(1);
63     }
64 }
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
68     try {
69         m = MothurOut::getInstance();
70         
71         pdiffs = p;
72         bdiffs = b;
73         ldiffs = l;
74         sdiffs = s;
75         paired = true;
76         
77         maxFBarcodeLength = 0;
78         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79             string forward = it->second.forward;
80             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
81             
82             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
83             
84             if (itForward == ifbarcodes.end()) {
85                 vector<int> temp; temp.push_back(it->first);
86                 ifbarcodes[forward] = temp;
87             }else { itForward->second.push_back(it->first); }
88         }
89         
90         maxFPrimerLength = 0;
91         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92             string forward = it->second.forward;
93             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
94             
95             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
96             
97             if (itForward == ifprimers.end()) {
98                 vector<int> temp; temp.push_back(it->first);
99                 ifprimers[forward] = temp;
100             }else { itForward->second.push_back(it->first); }
101         }
102         
103         maxRBarcodeLength = 0;
104         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105             string forward = it->second.reverse;
106             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
107             
108             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
109             
110             if (itForward == irbarcodes.end()) {
111                 vector<int> temp; temp.push_back(it->first);
112                 irbarcodes[forward] = temp;
113             }else { itForward->second.push_back(it->first); }
114         }
115         
116         maxRPrimerLength = 0;
117         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118             string forward = it->second.reverse;
119             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
120             
121             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
122             
123             if (itForward == irprimers.end()) {
124                 vector<int> temp; temp.push_back(it->first);
125                 irprimers[forward] = temp;
126             }else { itForward->second.push_back(it->first); }
127         }
128         
129         ipbarcodes = br;
130         ipprimers = pr;
131     }
132     catch(exception& e) {
133         m->errorOut(e, "TrimOligos", "TrimOligos");
134         exit(1);
135     }
136 }
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
140     try {
141         m = MothurOut::getInstance();
142         
143         pdiffs = p;
144         bdiffs = b;
145         
146         barcodes = br;
147         primers = pr;
148         revPrimer = r;
149         paired = false;
150         
151         maxFBarcodeLength = 0;
152         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153             string oligo = it->first;
154             if(oligo.length() > maxFBarcodeLength){
155                 maxFBarcodeLength = oligo.length();
156             }
157         }
158         maxRBarcodeLength = maxFBarcodeLength;
159         
160         maxFPrimerLength = 0;
161         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162             string oligo = it->first;
163             if(oligo.length() > maxFPrimerLength){
164                 maxFPrimerLength = oligo.length();
165             }
166         }
167         maxRPrimerLength = maxFPrimerLength;
168     }
169     catch(exception& e) {
170         m->errorOut(e, "TrimOligos", "TrimOligos");
171         exit(1);
172     }
173 }
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
178     try {
179         
180         string rawSequence = seq.getUnaligned();
181         
182         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183             string oligo = it->first;
184             
185             if(rawSequence.length() < oligo.length()) { break; }
186             
187             //search for primer
188             int olength = oligo.length();
189             for (int j = 0; j < rawSequence.length()-olength; j++){
190                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191                 string rawChunk = rawSequence.substr(j, olength);
192                 if(compareDNASeq(oligo, rawChunk)) {
193                     primerStart = j;
194                     primerEnd = primerStart + olength;
195                     return true;
196                 }
197                 
198             }
199         }
200         
201         primerStart = 0; primerEnd = 0;
202         //if you don't want to allow for diffs
203         if (pdiffs == 0) { return false; }
204         else { //try aligning and see if you can find it
205             
206             //can you find the barcode
207             int minDiff = 1e6;
208             int minCount = 1;
209             
210             Alignment* alignment;
211             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212             else{ alignment = NULL; }
213             
214             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
215                 
216                 string prim = it->first;
217                 //search for primer
218                 int olength = prim.length();
219                 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
220                 else{
221                     for (int j = 0; j < rawSequence.length()-olength; j++){
222                         
223                         string oligo = it->first;
224                         
225                         if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
226                         
227                         string rawChunk = rawSequence.substr(j, olength+pdiffs);
228                         
229                         //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230                         alignment->alignPrimer(oligo, rawChunk);
231                         oligo = alignment->getSeqAAln();
232                         string temp = alignment->getSeqBAln();
233                         
234                         int alnLength = oligo.length();
235                         
236                         for(int i=oligo.length()-1;i>=0;i--){
237                             if(oligo[i] != '-'){        alnLength = i+1;        break;  }
238                         }
239                         oligo = oligo.substr(0,alnLength);
240                         temp = temp.substr(0,alnLength);
241                         
242                         int numDiff = countDiffs(oligo, temp);
243                         
244                         if(numDiff < minDiff){
245                             minDiff = numDiff;
246                             minCount = 1;
247                             primerStart = j;
248                             primerEnd = primerStart + alnLength;
249                         }else if(numDiff == minDiff){ minCount++; }
250                     }
251                 }
252             }
253             
254             if (alignment != NULL) { delete alignment; }
255             
256             if(minDiff > pdiffs)        {       primerStart = 0; primerEnd = 0; return false;   }       //no good matches
257             else if(minCount > 1)       {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
258             else{ return true; }
259         }
260         
261         primerStart = 0; primerEnd = 0;
262         return false;
263         
264     }
265     catch(exception& e) {
266         m->errorOut(e, "TrimOligos", "stripForward");
267         exit(1);
268     }
269 }
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
272     try {
273         
274         string rawSequence = seq.getUnaligned();
275         
276         for(int i=0;i<revPrimer.size();i++){
277             string oligo = revPrimer[i];
278             if(rawSequence.length() < oligo.length()) { break; }
279             
280             //search for primer
281             int olength = oligo.length();
282             for (int j = rawSequence.length()-olength; j >= 0; j--){
283                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284                 string rawChunk = rawSequence.substr(j, olength);
285                 
286                 if(compareDNASeq(oligo, rawChunk)) {
287                     primerStart = j;
288                     primerEnd = primerStart + olength;
289                     return true;
290                 }
291                 
292             }
293         }
294         
295         primerStart = 0; primerEnd = 0;
296         return false;
297     }
298     catch(exception& e) {
299         m->errorOut(e, "PcrSeqsCommand", "findReverse");
300         exit(1);
301     }
302 }
303 //*******************************************************************/
304
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
306     try {
307         
308         if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
309         
310         string rawSequence = seq.getUnaligned();
311         int success = bdiffs + 1;       //guilty until proven innocent
312         
313         //can you find the barcode
314         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315             string oligo = it->first;
316             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
317                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
318                 break;
319             }
320             
321             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
322                 group = it->second;
323                 seq.setUnaligned(rawSequence.substr(oligo.length()));
324                 
325                 if(qual.getName() != ""){
326                     qual.trimQScores(oligo.length(), -1);
327                 }
328                 
329                 success = 0;
330                 break;
331             }
332         }
333         
334         //if you found the barcode or if you don't want to allow for diffs
335         if ((bdiffs == 0) || (success == 0)) { return success; }
336         
337         else { //try aligning and see if you can find it
338             Alignment* alignment;
339             if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340             else{ alignment = NULL; }
341             
342             //can you find the barcode
343             int minDiff = 1e6;
344             int minCount = 1;
345             int minGroup = -1;
346             int minPos = 0;
347             
348             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349                 string oligo = it->first;
350                 // int length = oligo.length();
351                 
352                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
353                     success = bdiffs + 10;
354                     break;
355                 }
356                 
357                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359                 oligo = alignment->getSeqAAln();
360                 string temp = alignment->getSeqBAln();
361                 
362                 int alnLength = oligo.length();
363                 
364                 for(int i=oligo.length()-1;i>=0;i--){
365                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
366                 }
367                 oligo = oligo.substr(0,alnLength);
368                 temp = temp.substr(0,alnLength);
369                 int numDiff = countDiffs(oligo, temp);
370                 
371                 if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
372                 
373                 if(numDiff < minDiff){
374                     minDiff = numDiff;
375                     minCount = 1;
376                     minGroup = it->second;
377                     minPos = 0;
378                     for(int i=0;i<alnLength;i++){
379                         if(temp[i] != '-'){
380                             minPos++;
381                         }
382                     }
383                 }
384                 else if(numDiff == minDiff){
385                     minCount++;
386                 }
387                 
388             }
389             
390             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
391             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
392             else{       //use the best match
393                 group = minGroup;
394                 seq.setUnaligned(rawSequence.substr(minPos));
395                 
396                 if(qual.getName() != ""){
397                     qual.trimQScores(minPos, -1);
398                 }
399                 success = minDiff;
400             }
401             
402             if (alignment != NULL) { delete alignment; }
403             
404         }
405         
406         return success;
407         
408     }
409     catch(exception& e) {
410         m->errorOut(e, "TrimOligos", "stripBarcode");
411         exit(1);
412     }
413 }
414 //*******************************************************************/
415 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
416     try {
417         //look for forward barcode
418         string rawFSequence = forwardSeq.getUnaligned();
419         string rawRSequence = reverseSeq.getUnaligned();
420         int success = bdiffs + 1;       //guilty until proven innocent
421         
422         //can you find the forward barcode
423         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
424             string foligo = it->second.forward;
425             string roligo = it->second.reverse;
426             
427             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
428                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
429                 break;
430             }
431             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
432                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
433                 break;
434             }
435             
436             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
437                 group = it->first;
438                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
439                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
440                 success = 0;
441                 break;
442             }
443         }
444         
445         //if you found the barcode or if you don't want to allow for diffs
446         if ((bdiffs == 0) || (success == 0)) { return success; }
447         else { //try aligning and see if you can find it
448             Alignment* alignment;
449             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
450             else{ alignment = NULL; }
451             
452             //can you find the barcode
453             int minDiff = 1e6;
454             int minCount = 1;
455             vector< vector<int> > minFGroup;
456             vector<int> minFPos;
457             
458             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
459             /*
460              1 Sarah Westcott
461              2 John Westcott
462              3 Anna Westcott
463              4 Sarah Schloss
464              5 Pat Schloss
465              6 Gail Brown
466              7 Pat Moore
467              only want to look for forward = Sarah, John, Anna, Pat, Gail
468              reverse = Westcott, Schloss, Brown, Moore
469              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
470              */
471             //cout << endl << forwardSeq.getName() << endl;
472             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
473                 string oligo = it->first;
474                 
475                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
476                     success = bdiffs + 10;
477                     break;
478                 }
479                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
480                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
481                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
482                 oligo = alignment->getSeqAAln();
483                 string temp = alignment->getSeqBAln();
484                 
485                 int alnLength = oligo.length();
486                 
487                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
488                 oligo = oligo.substr(0,alnLength);
489                 temp = temp.substr(0,alnLength);
490                 int numDiff = countDiffs(oligo, temp);
491                 
492                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
493                 
494                 if (alnLength == 0) { numDiff = bdiffs + 100; }
495                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
496                 
497                 if(numDiff < minDiff){
498                     minDiff = numDiff;
499                     minCount = 1;
500                     minFGroup.clear();
501                     minFGroup.push_back(it->second);
502                     int tempminFPos = 0;
503                     minFPos.clear();
504                     for(int i=0;i<alnLength;i++){
505                         if(temp[i] != '-'){
506                             tempminFPos++;
507                         }
508                     }
509                     minFPos.push_back(tempminFPos);
510                 }else if(numDiff == minDiff){
511                     minFGroup.push_back(it->second);
512                     int tempminFPos = 0;
513                     for(int i=0;i<alnLength;i++){
514                         if(temp[i] != '-'){
515                             tempminFPos++;
516                         }
517                     }
518                     minFPos.push_back(tempminFPos);
519                 }
520             }
521             
522             //cout << minDiff << '\t' << minCount << '\t' << endl;
523             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
524             else{
525                 //check for reverse match
526                 if (alignment != NULL) { delete alignment; }
527                 
528                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
529                 else{ alignment = NULL; }
530                 
531                 //can you find the barcode
532                 minDiff = 1e6;
533                 minCount = 1;
534                 vector< vector<int> > minRGroup;
535                 vector<int> minRPos;
536                 
537                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
538                     string oligo = it->first;
539                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
540                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
541                         success = bdiffs + 10;
542                         break;
543                     }
544                     
545                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
546                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
547                     oligo = alignment->getSeqAAln();
548                     string temp = alignment->getSeqBAln();
549                     
550                     int alnLength = oligo.length();
551                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
552                     oligo = oligo.substr(0,alnLength);
553                     temp = temp.substr(0,alnLength);
554                     int numDiff = countDiffs(oligo, temp);
555                     
556                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
557                     
558                     if (alnLength == 0) { numDiff = bdiffs + 100; }
559                     
560                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
561                     if(numDiff < minDiff){
562                         minDiff = numDiff;
563                         minCount = 1;
564                         minRGroup.clear();
565                         minRGroup.push_back(it->second);
566                         int tempminRPos = 0;
567                         minRPos.clear();
568                         for(int i=0;i<alnLength;i++){
569                             if(temp[i] != '-'){
570                                 tempminRPos++;
571                             }
572                         }
573                         minRPos.push_back(tempminRPos);
574                     }else if(numDiff == minDiff){
575                         int tempminRPos = 0;
576                         for(int i=0;i<alnLength;i++){
577                             if(temp[i] != '-'){
578                                 tempminRPos++;
579                             }
580                         }
581                         minRPos.push_back(tempminRPos);
582                         minRGroup.push_back(it->second);
583                     }
584                     
585                 }
586                 
587                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
588                 else {
589                     bool foundMatch = false;
590                     vector<int> matches;
591                     for (int i = 0; i < minFGroup.size(); i++) {
592                         for (int j = 0; j < minFGroup[i].size(); j++) {
593                             for (int k = 0; k < minRGroup.size(); k++) {
594                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
595                             }
596                         }
597                     }
598                     
599                     int fStart = 0;
600                     int rStart = 0;
601                     if (matches.size() == 1) {
602                         foundMatch = true;
603                         group = matches[0];
604                         fStart = minFPos[0];
605                         rStart = minRPos[0];
606                     }
607                     
608                     //we have an acceptable match for the forward and reverse, but do they match?
609                     if (foundMatch) {
610                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
611                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
612                         success = minDiff;
613                     }else { success = bdiffs + 100;     }
614                 }
615             }
616             
617             if (alignment != NULL) { delete alignment; }
618         }
619         
620         return success;
621         
622     }
623     catch(exception& e) {
624         m->errorOut(e, "TrimOligos", "stripIBarcode");
625         exit(1);
626     }
627     
628 }
629 //*******************************************************************/
630 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
631     try {
632         //look for forward barcode
633         string rawFSequence = forwardSeq.getUnaligned();
634         string rawRSequence = reverseSeq.getUnaligned();
635         int success = bdiffs + 1;       //guilty until proven innocent
636         
637         //can you find the forward barcode
638         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
639             string foligo = it->second.forward;
640             string roligo = it->second.reverse;
641             
642             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
643                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
644                 break;
645             }
646             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
647                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
648                 break;
649             }
650             
651             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
652                 group = it->first;
653                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
654                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
655                 forwardQual.trimQScores(foligo.length(), -1);
656                 reverseQual.trimQScores(roligo.length(), -1);
657                 success = 0;
658                 break;
659             }
660         }
661         
662         //if you found the barcode or if you don't want to allow for diffs
663         if ((bdiffs == 0) || (success == 0)) { return success; }
664         else { //try aligning and see if you can find it
665             Alignment* alignment;
666             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
667             else{ alignment = NULL; }
668             
669             //can you find the barcode
670             int minDiff = 1e6;
671             int minCount = 1;
672             vector< vector<int> > minFGroup;
673             vector<int> minFPos;
674             
675             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
676             /*
677              1 Sarah Westcott
678              2 John Westcott
679              3 Anna Westcott
680              4 Sarah Schloss
681              5 Pat Schloss
682              6 Gail Brown
683              7 Pat Moore
684              only want to look for forward = Sarah, John, Anna, Pat, Gail
685              reverse = Westcott, Schloss, Brown, Moore
686              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
687              */
688             //cout << endl << forwardSeq.getName() << endl;
689             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
690                 string oligo = it->first;
691                 
692                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
693                     success = bdiffs + 10;
694                     break;
695                 }
696                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
697                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
698                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
699                 oligo = alignment->getSeqAAln();
700                 string temp = alignment->getSeqBAln();
701                 
702                 int alnLength = oligo.length();
703                 
704                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
705                 oligo = oligo.substr(0,alnLength);
706                 temp = temp.substr(0,alnLength);
707                 int numDiff = countDiffs(oligo, temp);
708                 
709                 if (alnLength == 0) { numDiff = bdiffs + 100; }
710                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
711                 
712                 if(numDiff < minDiff){
713                     minDiff = numDiff;
714                     minCount = 1;
715                     minFGroup.clear();
716                     minFGroup.push_back(it->second);
717                     int tempminFPos = 0;
718                     minFPos.clear();
719                     for(int i=0;i<alnLength;i++){
720                         if(temp[i] != '-'){
721                             tempminFPos++;
722                         }
723                     }
724                     minFPos.push_back(tempminFPos);
725                 }else if(numDiff == minDiff){
726                     minFGroup.push_back(it->second);
727                     int tempminFPos = 0;
728                     for(int i=0;i<alnLength;i++){
729                         if(temp[i] != '-'){
730                             tempminFPos++;
731                         }
732                     }
733                     minFPos.push_back(tempminFPos);
734                 }
735             }
736             
737             //cout << minDiff << '\t' << minCount << '\t' << endl;
738             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
739             else{
740                 //check for reverse match
741                 if (alignment != NULL) { delete alignment; }
742                 
743                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
744                 else{ alignment = NULL; }
745                 
746                 //can you find the barcode
747                 minDiff = 1e6;
748                 minCount = 1;
749                 vector< vector<int> > minRGroup;
750                 vector<int> minRPos;
751                 
752                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
753                     string oligo = it->first;
754                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
755                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
756                         success = bdiffs + 10;
757                         break;
758                     }
759                     
760                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
761                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
762                     oligo = alignment->getSeqAAln();
763                     string temp = alignment->getSeqBAln();
764                     
765                     int alnLength = oligo.length();
766                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
767                     oligo = oligo.substr(0,alnLength);
768                     temp = temp.substr(0,alnLength);
769                     int numDiff = countDiffs(oligo, temp);
770                     if (alnLength == 0) { numDiff = bdiffs + 100; }
771                     
772                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + reverseSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
773                     
774                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
775                     if(numDiff < minDiff){
776                         minDiff = numDiff;
777                         minCount = 1;
778                         minRGroup.clear();
779                         minRGroup.push_back(it->second);
780                         int tempminRPos = 0;
781                         minRPos.clear();
782                         for(int i=0;i<alnLength;i++){
783                             if(temp[i] != '-'){
784                                 tempminRPos++;
785                             }
786                         }
787                         minRPos.push_back(tempminRPos);
788                     }else if(numDiff == minDiff){
789                         int tempminRPos = 0;
790                         for(int i=0;i<alnLength;i++){
791                             if(temp[i] != '-'){
792                                 tempminRPos++;
793                             }
794                         }
795                         minRPos.push_back(tempminRPos);
796                         minRGroup.push_back(it->second);
797                     }
798                     
799                 }
800                 
801                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
802                 else {
803                     bool foundMatch = false;
804                     vector<int> matches;
805                     for (int i = 0; i < minFGroup.size(); i++) {
806                         for (int j = 0; j < minFGroup[i].size(); j++) {
807                             for (int k = 0; k < minRGroup.size(); k++) {
808                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
809                             }
810                         }
811                     }
812                     
813                     int fStart = 0;
814                     int rStart = 0;
815                     if (matches.size() == 1) {
816                         foundMatch = true;
817                         group = matches[0];
818                         fStart = minFPos[0];
819                         rStart = minRPos[0];
820                     }
821                     
822                     //we have an acceptable match for the forward and reverse, but do they match?
823                     if (foundMatch) {
824                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
825                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
826                         forwardQual.trimQScores(fStart, -1);
827                         reverseQual.trimQScores(rStart, -1);
828                         success = minDiff;
829                     }else { success = bdiffs + 100;     }
830                 }
831             }
832             
833             if (alignment != NULL) { delete alignment; }
834         }
835         
836         return success;
837         
838     }
839     catch(exception& e) {
840         m->errorOut(e, "TrimOligos", "stripIBarcode");
841         exit(1);
842     }
843     
844 }
845 //*******************************************************************/
846 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
847     try {
848       m->debugOut("Stripping paired barcode");
849         //look for forward barcode
850         string rawSeq = seq.getUnaligned();
851         int success = bdiffs + 1;       //guilty until proven innocent
852         //cout << seq.getName() << endl;
853         //can you find the forward barcode
854         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
855             string foligo = it->second.forward;
856             string roligo = it->second.reverse;
857             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
858             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
859             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
860                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
861                 break;
862             }
863             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
864                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
865                 break;
866             }
867             
868             if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
869             
870             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
871                 group = it->first;
872                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
873                 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
874                 if(qual.getName() != ""){
875                     qual.trimQScores(-1, rawSeq.length()-roligo.length());
876                     qual.trimQScores(foligo.length(), -1);
877                 }
878                 success = 0;
879                 break;
880             }
881         }
882         //cout << "success=" << success << endl;
883         //if you found the barcode or if you don't want to allow for diffs
884         if ((bdiffs == 0) || (success == 0)) { return success; }
885         else { //try aligning and see if you can find it
886             Alignment* alignment;
887             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
888             else{ alignment = NULL; }
889             
890             //can you find the barcode
891             int minDiff = 1e6;
892             int minCount = 1;
893             vector< vector<int> > minFGroup;
894             vector<int> minFPos;
895             
896             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
897             /*
898              1 Sarah Westcott
899              2 John Westcott
900              3 Anna Westcott
901              4 Sarah Schloss
902              5 Pat Schloss
903              6 Gail Brown
904              7 Pat Moore
905              only want to look for forward = Sarah, John, Anna, Pat, Gail
906              reverse = Westcott, Schloss, Brown, Moore
907              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
908              */
909             //cout << endl << forwardSeq.getName() << endl;
910             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
911                 string oligo = it->first;
912                 
913                 if(rawSeq.length() < maxFBarcodeLength){        //let's just assume that the barcodes are the same length
914                     success = bdiffs + 10;
915                     break;
916                 }
917                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
918                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
919                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
920                 oligo = alignment->getSeqAAln();
921                 string temp = alignment->getSeqBAln();
922                 
923                 int alnLength = oligo.length();
924                 
925                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
926                 oligo = oligo.substr(0,alnLength);
927                 temp = temp.substr(0,alnLength);
928                 int numDiff = countDiffs(oligo, temp);
929                 
930                 if (alnLength == 0) { numDiff = bdiffs + 100; }
931                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
932                 
933                 if(numDiff < minDiff){
934                     minDiff = numDiff;
935                     minCount = 1;
936                     minFGroup.clear();
937                     minFGroup.push_back(it->second);
938                     int tempminFPos = 0;
939                     minFPos.clear();
940                     for(int i=0;i<alnLength;i++){
941                         if(temp[i] != '-'){
942                             tempminFPos++;
943                         }
944                     }
945                     minFPos.push_back(tempminFPos);
946                 }else if(numDiff == minDiff){
947                     minFGroup.push_back(it->second);
948                     int tempminFPos = 0;
949                     for(int i=0;i<alnLength;i++){
950                         if(temp[i] != '-'){
951                             tempminFPos++;
952                         }
953                     }
954                     minFPos.push_back(tempminFPos);
955                 }
956             }
957             
958             //cout << minDiff << '\t' << minCount << '\t' << endl;
959             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
960             else{
961                 //check for reverse match
962                 if (alignment != NULL) { delete alignment; }
963                 
964                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
965                 else{ alignment = NULL; }
966                 
967                 //can you find the barcode
968                 minDiff = 1e6;
969                 minCount = 1;
970                 vector< vector<int> > minRGroup;
971                 vector<int> minRPos;
972                 
973                 string rawRSequence = reverseOligo(seq.getUnaligned());
974                 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
975                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
976                     string oligo = reverseOligo(it->first);
977                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
978                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
979                         success = bdiffs + 10;
980                         break;
981                     }
982                     
983                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
984                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
985                     oligo = alignment->getSeqAAln();
986                     string temp = alignment->getSeqBAln();
987                     
988                     int alnLength = oligo.length();
989                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
990                     oligo = oligo.substr(0,alnLength);
991                     temp = temp.substr(0,alnLength);
992                     int numDiff = countDiffs(oligo, temp);
993                     if (alnLength == 0) { numDiff = bdiffs + 100; }
994                     
995                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
996                     if(numDiff < minDiff){
997                         minDiff = numDiff;
998                         minCount = 1;
999                         minRGroup.clear();
1000                         minRGroup.push_back(it->second);
1001                         int tempminRPos = 0;
1002                         minRPos.clear();
1003                         for(int i=0;i<alnLength;i++){
1004                             if(temp[i] != '-'){
1005                                 tempminRPos++;
1006                             }
1007                         }
1008                         minRPos.push_back(tempminRPos);
1009                     }else if(numDiff == minDiff){
1010                         int tempminRPos = 0;
1011                         for(int i=0;i<alnLength;i++){
1012                             if(temp[i] != '-'){
1013                                 tempminRPos++;
1014                             }
1015                         }
1016                         minRPos.push_back(tempminRPos);
1017                         minRGroup.push_back(it->second);
1018                     }
1019                     
1020                 }
1021                 
1022                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
1023                 else {
1024                     bool foundMatch = false;
1025                     vector<int> matches;
1026                     for (int i = 0; i < minFGroup.size(); i++) {
1027                         for (int j = 0; j < minFGroup[i].size(); j++) {
1028                             for (int k = 0; k < minRGroup.size(); k++) {
1029                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1030                             }
1031                         }
1032                     }
1033                     
1034                     int fStart = 0;
1035                     int rStart = 0;
1036                     if (matches.size() == 1) {
1037                         foundMatch = true;
1038                         group = matches[0];
1039                         fStart = minFPos[0];
1040                         rStart = rawSeq.length() - minRPos[0];
1041                         if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
1042                     }
1043                     
1044                     //we have an acceptable match for the forward and reverse, but do they match?
1045                     if (foundMatch) {
1046                         string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1047                         seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1048                         if(qual.getName() != ""){
1049                             qual.trimQScores(-1, rStart);
1050                             qual.trimQScores(fStart, -1);
1051                         }
1052                         success = minDiff;
1053                         //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1054                     }else { success = bdiffs + 100;     }
1055                 }
1056             }
1057             
1058             if (alignment != NULL) { delete alignment; }
1059         }
1060         
1061         return success;
1062         
1063     }
1064     catch(exception& e) {
1065         m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1066         exit(1);
1067     }
1068     
1069 }
1070 //*******************************************************************/
1071 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1072     try {
1073         //look for forward
1074         string rawSeq = seq.getUnaligned();
1075         int success = pdiffs + 1;       //guilty until proven innocent
1076         //cout << seq.getName() << endl;
1077         //can you find the forward
1078         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1079             string foligo = it->second.forward;
1080             string roligo = it->second.reverse;
1081             
1082             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1083             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1084             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
1085                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1086                 break;
1087             }
1088             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
1089                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1090                 break;
1091             }
1092             
1093             if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
1094             
1095             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1096                 group = it->first;
1097                 if (!keepForward) {
1098                     string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1099                     seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1100                     if(qual.getName() != ""){
1101                         qual.trimQScores(-1, rawSeq.length()-roligo.length());
1102                         qual.trimQScores(foligo.length(), -1);
1103                     }
1104                 }
1105                 success = 0;
1106                 break;
1107             }
1108         }
1109         //cout << "success=" << success << endl;
1110         //if you found the barcode or if you don't want to allow for diffs
1111         if ((pdiffs == 0) || (success == 0)) { return success; }
1112         else { //try aligning and see if you can find it
1113             Alignment* alignment;
1114             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1115             else{ alignment = NULL; }
1116             
1117             //can you find the barcode
1118             int minDiff = 1e6;
1119             int minCount = 1;
1120             vector< vector<int> > minFGroup;
1121             vector<int> minFPos;
1122             
1123             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1124             /*
1125              1 Sarah Westcott
1126              2 John Westcott
1127              3 Anna Westcott
1128              4 Sarah Schloss
1129              5 Pat Schloss
1130              6 Gail Brown
1131              7 Pat Moore
1132              only want to look for forward = Sarah, John, Anna, Pat, Gail
1133              reverse = Westcott, Schloss, Brown, Moore
1134              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1135              */
1136             //cout << endl << forwardSeq.getName() << endl;
1137             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1138                 string oligo = it->first;
1139                 
1140                 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1141                     success = pdiffs + 10;
1142                     break;
1143                 }
1144                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
1145                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1146                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1147                 oligo = alignment->getSeqAAln();
1148                 string temp = alignment->getSeqBAln();
1149                 
1150                 int alnLength = oligo.length();
1151                 
1152                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1153                 oligo = oligo.substr(0,alnLength);
1154                 temp = temp.substr(0,alnLength);
1155                 int numDiff = countDiffs(oligo, temp);
1156                 
1157                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1158                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1159                 
1160                 if(numDiff < minDiff){
1161                     minDiff = numDiff;
1162                     minCount = 1;
1163                     minFGroup.clear();
1164                     minFGroup.push_back(it->second);
1165                     int tempminFPos = 0;
1166                     minFPos.clear();
1167                     for(int i=0;i<alnLength;i++){
1168                         if(temp[i] != '-'){
1169                             tempminFPos++;
1170                         }
1171                     }
1172                     minFPos.push_back(tempminFPos);
1173                 }else if(numDiff == minDiff){
1174                     minFGroup.push_back(it->second);
1175                     int tempminFPos = 0;
1176                     for(int i=0;i<alnLength;i++){
1177                         if(temp[i] != '-'){
1178                             tempminFPos++;
1179                         }
1180                     }
1181                     minFPos.push_back(tempminFPos);
1182                 }
1183             }
1184             
1185             //cout << minDiff << '\t' << minCount << '\t' << endl;
1186             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1187             else{
1188                 //check for reverse match
1189                 if (alignment != NULL) { delete alignment; }
1190                 
1191                 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1192                 else{ alignment = NULL; }
1193                 
1194                 //can you find the barcode
1195                 minDiff = 1e6;
1196                 minCount = 1;
1197                 vector< vector<int> > minRGroup;
1198                 vector<int> minRPos;
1199                 
1200                 string rawRSequence = reverseOligo(seq.getUnaligned());
1201                 
1202                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1203                     string oligo = reverseOligo(it->first);
1204                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1205                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1206                         success = pdiffs + 10;
1207                         break;
1208                     }
1209                     
1210                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1211                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1212                     oligo = alignment->getSeqAAln();
1213                     string temp = alignment->getSeqBAln();
1214                     
1215                     int alnLength = oligo.length();
1216                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1217                     oligo = oligo.substr(0,alnLength);
1218                     temp = temp.substr(0,alnLength);
1219                     int numDiff = countDiffs(oligo, temp);
1220                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1221                     
1222                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1223                     if(numDiff < minDiff){
1224                         minDiff = numDiff;
1225                         minCount = 1;
1226                         minRGroup.clear();
1227                         minRGroup.push_back(it->second);
1228                         int tempminRPos = 0;
1229                         minRPos.clear();
1230                         for(int i=0;i<alnLength;i++){
1231                             if(temp[i] != '-'){
1232                                 tempminRPos++;
1233                             }
1234                         }
1235                         minRPos.push_back(tempminRPos);
1236                     }else if(numDiff == minDiff){
1237                         int tempminRPos = 0;
1238                         for(int i=0;i<alnLength;i++){
1239                             if(temp[i] != '-'){
1240                                 tempminRPos++;
1241                             }
1242                         }
1243                         minRPos.push_back(tempminRPos);
1244                         minRGroup.push_back(it->second);
1245                     }
1246                     
1247                 }
1248                 
1249                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1250                 else {
1251                     bool foundMatch = false;
1252                     vector<int> matches;
1253                     for (int i = 0; i < minFGroup.size(); i++) {
1254                         for (int j = 0; j < minFGroup[i].size(); j++) {
1255                             for (int k = 0; k < minRGroup.size(); k++) {
1256                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1257                             }
1258                         }
1259                     }
1260                     
1261                     int fStart = 0;
1262                     int rStart = 0;
1263                     if (matches.size() == 1) {
1264                         foundMatch = true;
1265                         group = matches[0];
1266                         fStart = minFPos[0];
1267                         rStart = rawSeq.length() - minRPos[0];
1268                         if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
1269                     }
1270                     
1271                     //we have an acceptable match for the forward and reverse, but do they match?
1272                     if (foundMatch) {
1273                         if (!keepForward) {
1274                             string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1275                             seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1276                             if(qual.getName() != ""){
1277                                 qual.trimQScores(-1, rStart);
1278                                 qual.trimQScores(fStart, -1);
1279                             }
1280                         }
1281                         success = minDiff;
1282                         //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1283                     }else { success = pdiffs + 100;     }
1284                 }
1285             }
1286             
1287             if (alignment != NULL) { delete alignment; }
1288         }
1289         
1290         return success;
1291         
1292     }
1293     catch(exception& e) {
1294         m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1295         exit(1);
1296     }
1297     
1298 }
1299
1300 //*******************************************************************/
1301 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1302     try {
1303         //look for forward barcode
1304         string rawFSequence = forwardSeq.getUnaligned();
1305         string rawRSequence = reverseSeq.getUnaligned();
1306         int success = pdiffs + 1;       //guilty until proven innocent
1307         
1308         //can you find the forward barcode
1309         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1310             string foligo = it->second.forward;
1311             string roligo = it->second.reverse;
1312             
1313             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1314                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1315                 break;
1316             }
1317             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1318                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1319                 break;
1320             }
1321             
1322             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
1323                 group = it->first;
1324                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1325                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1326                 forwardQual.trimQScores(foligo.length(), -1);
1327                 reverseQual.trimQScores(roligo.length(), -1);
1328                 success = 0;
1329                 break;
1330             }
1331         }
1332         
1333         //if you found the barcode or if you don't want to allow for diffs
1334         if ((pdiffs == 0) || (success == 0)) { return success; }
1335         else { //try aligning and see if you can find it
1336             Alignment* alignment;
1337             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1338             else{ alignment = NULL; }
1339             
1340             //can you find the barcode
1341             int minDiff = 1e6;
1342             int minCount = 1;
1343             vector< vector<int> > minFGroup;
1344             vector<int> minFPos;
1345             
1346             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1347             /*
1348              1 Sarah Westcott
1349              2 John Westcott
1350              3 Anna Westcott
1351              4 Sarah Schloss
1352              5 Pat Schloss
1353              6 Gail Brown
1354              7 Pat Moore
1355              only want to look for forward = Sarah, John, Anna, Pat, Gail
1356              reverse = Westcott, Schloss, Brown, Moore
1357              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1358              */
1359             //cout << endl << forwardSeq.getName() << endl;
1360             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1361                 string oligo = it->first;
1362                 
1363                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1364                     success = pdiffs + 10;
1365                     break;
1366                 }
1367                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1368                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1369                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1370                 oligo = alignment->getSeqAAln();
1371                 string temp = alignment->getSeqBAln();
1372                 
1373                 int alnLength = oligo.length();
1374                 
1375                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1376                 oligo = oligo.substr(0,alnLength);
1377                 temp = temp.substr(0,alnLength);
1378                 int numDiff = countDiffs(oligo, temp);
1379                 
1380                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1381                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1382                 
1383                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
1384                 
1385                 if(numDiff < minDiff){
1386                     minDiff = numDiff;
1387                     minCount = 1;
1388                     minFGroup.clear();
1389                     minFGroup.push_back(it->second);
1390                     int tempminFPos = 0;
1391                     minFPos.clear();
1392                     for(int i=0;i<alnLength;i++){
1393                         if(temp[i] != '-'){
1394                             tempminFPos++;
1395                         }
1396                     }
1397                     minFPos.push_back(tempminFPos);
1398                 }else if(numDiff == minDiff){
1399                     minFGroup.push_back(it->second);
1400                     int tempminFPos = 0;
1401                     for(int i=0;i<alnLength;i++){
1402                         if(temp[i] != '-'){
1403                             tempminFPos++;
1404                         }
1405                     }
1406                     minFPos.push_back(tempminFPos);
1407                 }
1408             }
1409             
1410             //cout << minDiff << '\t' << minCount << '\t' << endl;
1411             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1412             else{
1413                 //check for reverse match
1414                 if (alignment != NULL) { delete alignment; }
1415                 
1416                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1417                 else{ alignment = NULL; }
1418                 
1419                 //can you find the barcode
1420                 minDiff = 1e6;
1421                 minCount = 1;
1422                 vector< vector<int> > minRGroup;
1423                 vector<int> minRPos;
1424                 
1425                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1426                     string oligo = it->first;
1427                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1428                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1429                         success = pdiffs + 10;
1430                         break;
1431                     }
1432                     
1433                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1434                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1435                     oligo = alignment->getSeqAAln();
1436                     string temp = alignment->getSeqBAln();
1437                     
1438                     int alnLength = oligo.length();
1439                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1440                     oligo = oligo.substr(0,alnLength);
1441                     temp = temp.substr(0,alnLength);
1442                     int numDiff = countDiffs(oligo, temp);
1443                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1444                     
1445                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
1446                     
1447                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1448                     if(numDiff < minDiff){
1449                         minDiff = numDiff;
1450                         minCount = 1;
1451                         minRGroup.clear();
1452                         minRGroup.push_back(it->second);
1453                         int tempminRPos = 0;
1454                         minRPos.clear();
1455                         for(int i=0;i<alnLength;i++){
1456                             if(temp[i] != '-'){
1457                                 tempminRPos++;
1458                             }
1459                         }
1460                         minRPos.push_back(tempminRPos);
1461                     }else if(numDiff == minDiff){
1462                         int tempminRPos = 0;
1463                         for(int i=0;i<alnLength;i++){
1464                             if(temp[i] != '-'){
1465                                 tempminRPos++;
1466                             }
1467                         }
1468                         minRPos.push_back(tempminRPos);
1469                         minRGroup.push_back(it->second);
1470                     }
1471                     
1472                 }
1473                 
1474                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1475                 else {
1476                     bool foundMatch = false;
1477                     vector<int> matches;
1478                     for (int i = 0; i < minFGroup.size(); i++) {
1479                         for (int j = 0; j < minFGroup[i].size(); j++) {
1480                             for (int k = 0; k < minRGroup.size(); k++) {
1481                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1482                             }
1483                         }
1484                     }
1485                     
1486                     int fStart = 0;
1487                     int rStart = 0;
1488                     if (matches.size() == 1) {
1489                         foundMatch = true;
1490                         group = matches[0];
1491                         fStart = minFPos[0];
1492                         rStart = minRPos[0];
1493                     }
1494                     
1495                     //we have an acceptable match for the forward and reverse, but do they match?
1496                     if (foundMatch) {
1497                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1498                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1499                         forwardQual.trimQScores(fStart, -1);
1500                         reverseQual.trimQScores(rStart, -1);
1501                         success = minDiff;
1502                     }else { success = pdiffs + 100;     }
1503                 }
1504             }
1505             
1506             if (alignment != NULL) { delete alignment; }
1507         }
1508         
1509         return success;
1510         
1511     }
1512     catch(exception& e) {
1513         m->errorOut(e, "TrimOligos", "stripIForward");
1514         exit(1);
1515     }
1516     
1517 }
1518 //*******************************************************************/
1519 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1520     try {
1521         //look for forward barcode
1522         string rawFSequence = forwardSeq.getUnaligned();
1523         string rawRSequence = reverseSeq.getUnaligned();
1524         int success = pdiffs + 1;       //guilty until proven innocent
1525         
1526         //can you find the forward barcode
1527         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1528             string foligo = it->second.forward;
1529             string roligo = it->second.reverse;
1530             
1531             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1532                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1533                 break;
1534             }
1535             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1536                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1537                 break;
1538             }
1539             
1540             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1541                 group = it->first;
1542                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1543                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1544                 success = 0;
1545                 break;
1546             }
1547         }
1548         
1549         //if you found the barcode or if you don't want to allow for diffs
1550         if ((pdiffs == 0) || (success == 0)) { return success; }
1551         else { //try aligning and see if you can find it
1552             Alignment* alignment;
1553             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1554             else{ alignment = NULL; }
1555             
1556             //can you find the barcode
1557             int minDiff = 1e6;
1558             int minCount = 1;
1559             vector< vector<int> > minFGroup;
1560             vector<int> minFPos;
1561             
1562             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1563             /*
1564              1 Sarah Westcott
1565              2 John Westcott
1566              3 Anna Westcott
1567              4 Sarah Schloss
1568              5 Pat Schloss
1569              6 Gail Brown
1570              7 Pat Moore
1571              only want to look for forward = Sarah, John, Anna, Pat, Gail
1572              reverse = Westcott, Schloss, Brown, Moore
1573              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1574              */
1575             //cout << endl << forwardSeq.getName() << endl;
1576             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1577                 string oligo = it->first;
1578                 
1579                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1580                     success = pdiffs + 10;
1581                     break;
1582                 }
1583                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1584                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1585                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1586                 oligo = alignment->getSeqAAln();
1587                 string temp = alignment->getSeqBAln();
1588                 
1589                 int alnLength = oligo.length();
1590                 
1591                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1592                 oligo = oligo.substr(0,alnLength);
1593                 temp = temp.substr(0,alnLength);
1594                 int numDiff = countDiffs(oligo, temp);
1595                 
1596                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
1597                 
1598                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1599                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1600                 
1601                 if(numDiff < minDiff){
1602                     minDiff = numDiff;
1603                     minCount = 1;
1604                     minFGroup.clear();
1605                     minFGroup.push_back(it->second);
1606                     int tempminFPos = 0;
1607                     minFPos.clear();
1608                     for(int i=0;i<alnLength;i++){
1609                         if(temp[i] != '-'){
1610                             tempminFPos++;
1611                         }
1612                     }
1613                     minFPos.push_back(tempminFPos);
1614                 }else if(numDiff == minDiff){
1615                     minFGroup.push_back(it->second);
1616                     int tempminFPos = 0;
1617                     for(int i=0;i<alnLength;i++){
1618                         if(temp[i] != '-'){
1619                             tempminFPos++;
1620                         }
1621                     }
1622                     minFPos.push_back(tempminFPos);
1623                 }
1624             }
1625             
1626             //cout << minDiff << '\t' << minCount << '\t' << endl;
1627             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1628             else{
1629                 //check for reverse match
1630                 if (alignment != NULL) { delete alignment; }
1631                 
1632                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1633                 else{ alignment = NULL; }
1634                 
1635                 //can you find the barcode
1636                 minDiff = 1e6;
1637                 minCount = 1;
1638                 vector< vector<int> > minRGroup;
1639                 vector<int> minRPos;
1640                 
1641                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1642                     string oligo = it->first;
1643                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1644                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1645                         success = pdiffs + 10;
1646                         break;
1647                     }
1648                     
1649                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1650                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1651                     oligo = alignment->getSeqAAln();
1652                     string temp = alignment->getSeqBAln();
1653                     
1654                     int alnLength = oligo.length();
1655                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1656                     oligo = oligo.substr(0,alnLength);
1657                     temp = temp.substr(0,alnLength);
1658                     int numDiff = countDiffs(oligo, temp);
1659                     
1660                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
1661                     
1662                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1663                     
1664                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1665                     if(numDiff < minDiff){
1666                         minDiff = numDiff;
1667                         minCount = 1;
1668                         minRGroup.clear();
1669                         minRGroup.push_back(it->second);
1670                         int tempminRPos = 0;
1671                         minRPos.clear();
1672                         for(int i=0;i<alnLength;i++){
1673                             if(temp[i] != '-'){
1674                                 tempminRPos++;
1675                             }
1676                         }
1677                         minRPos.push_back(tempminRPos);
1678                     }else if(numDiff == minDiff){
1679                         int tempminRPos = 0;
1680                         for(int i=0;i<alnLength;i++){
1681                             if(temp[i] != '-'){
1682                                 tempminRPos++;
1683                             }
1684                         }
1685                         minRPos.push_back(tempminRPos);
1686                         minRGroup.push_back(it->second);
1687                     }
1688                     
1689                 }
1690                 
1691                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1692                 else {
1693                     bool foundMatch = false;
1694                     vector<int> matches;
1695                     for (int i = 0; i < minFGroup.size(); i++) {
1696                         for (int j = 0; j < minFGroup[i].size(); j++) {
1697                             for (int k = 0; k < minRGroup.size(); k++) {
1698                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1699                             }
1700                         }
1701                     }
1702                     
1703                     int fStart = 0;
1704                     int rStart = 0;
1705                     if (matches.size() == 1) {
1706                         foundMatch = true;
1707                         group = matches[0];
1708                         fStart = minFPos[0];
1709                         rStart = minRPos[0];
1710                     }
1711                     
1712                     //we have an acceptable match for the forward and reverse, but do they match?
1713                     if (foundMatch) {
1714                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1715                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1716                         success = minDiff;
1717                     }else { success = pdiffs + 100;     }
1718                 }
1719             }
1720             
1721             if (alignment != NULL) { delete alignment; }
1722         }
1723         
1724         return success;
1725         
1726     }
1727     catch(exception& e) {
1728         m->errorOut(e, "TrimOligos", "stripIForward");
1729         exit(1);
1730     }
1731     
1732 }
1733 //*******************************************************************/
1734 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1735     try {
1736         
1737         string rawSequence = seq.getUnaligned();
1738         int success = bdiffs + 1;       //guilty until proven innocent
1739         
1740         //can you find the barcode
1741         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1742             string oligo = it->first;
1743             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
1744                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1745                 break;
1746             }
1747             
1748             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1749                 group = it->second;
1750                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1751                 
1752                 success = 0;
1753                 break;
1754             }
1755         }
1756         
1757         //if you found the barcode or if you don't want to allow for diffs
1758         if ((bdiffs == 0) || (success == 0)) { return success; }
1759         
1760         else { //try aligning and see if you can find it
1761             Alignment* alignment;
1762             if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1763             else{ alignment = NULL; }
1764             
1765             //can you find the barcode
1766             int minDiff = 1e6;
1767             int minCount = 1;
1768             int minGroup = -1;
1769             int minPos = 0;
1770             
1771             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1772                 string oligo = it->first;
1773                 // int length = oligo.length();
1774                 
1775                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
1776                     success = bdiffs + 10;
1777                     break;
1778                 }
1779                 
1780                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1781                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1782                 oligo = alignment->getSeqAAln();
1783                 string temp = alignment->getSeqBAln();
1784                 
1785                 int alnLength = oligo.length();
1786                 
1787                 for(int i=oligo.length()-1;i>=0;i--){
1788                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1789                 }
1790                 oligo = oligo.substr(0,alnLength);
1791                 temp = temp.substr(0,alnLength);
1792                 
1793                 int numDiff = countDiffs(oligo, temp);
1794                 
1795                 if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
1796                 
1797                 if(numDiff < minDiff){
1798                     minDiff = numDiff;
1799                     minCount = 1;
1800                     minGroup = it->second;
1801                     minPos = 0;
1802                     for(int i=0;i<alnLength;i++){
1803                         if(temp[i] != '-'){
1804                             minPos++;
1805                         }
1806                     }
1807                 }
1808                 else if(numDiff == minDiff){
1809                     minCount++;
1810                 }
1811                 
1812             }
1813             
1814             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
1815             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1816             else{       //use the best match
1817                 group = minGroup;
1818                 seq.setUnaligned(rawSequence.substr(minPos));
1819                 success = minDiff;
1820             }
1821             
1822             if (alignment != NULL) { delete alignment; }
1823             
1824         }
1825         
1826         return success;
1827         
1828     }
1829     catch(exception& e) {
1830         m->errorOut(e, "TrimOligos", "stripBarcode");
1831         exit(1);
1832     }
1833     
1834 }
1835
1836 /********************************************************************/
1837 int TrimOligos::stripForward(Sequence& seq, int& group){
1838     try {
1839         
1840         string rawSequence = seq.getUnaligned();
1841         int success = pdiffs + 1;       //guilty until proven innocent
1842         
1843         //can you find the primer
1844         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1845             string oligo = it->first;
1846             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
1847                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1848                 break;
1849             }
1850             
1851             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1852                 group = it->second;
1853                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1854                 success = 0;
1855                 break;
1856             }
1857         }
1858         
1859         //if you found the barcode or if you don't want to allow for diffs
1860         if ((pdiffs == 0) || (success == 0)) {  return success; }
1861         
1862         else { //try aligning and see if you can find it
1863             Alignment* alignment;
1864             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1865             else{ alignment = NULL; }
1866             
1867             //can you find the barcode
1868             int minDiff = 1e6;
1869             int minCount = 1;
1870             int minGroup = -1;
1871             int minPos = 0;
1872             
1873             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1874                 string oligo = it->first;
1875                 // int length = oligo.length();
1876                 
1877                 if(rawSequence.length() < maxFPrimerLength){
1878                     success = pdiffs + 100;
1879                     break;
1880                 }
1881                 
1882                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1883                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1884                 oligo = alignment->getSeqAAln();
1885                 string temp = alignment->getSeqBAln();
1886                 
1887                 int alnLength = oligo.length();
1888                 
1889                 for(int i=oligo.length()-1;i>=0;i--){
1890                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1891                 }
1892                 oligo = oligo.substr(0,alnLength);
1893                 temp = temp.substr(0,alnLength);
1894                 
1895                 int numDiff = countDiffs(oligo, temp);
1896                 
1897                 if(numDiff < minDiff){
1898                     minDiff = numDiff;
1899                     minCount = 1;
1900                     minGroup = it->second;
1901                     minPos = 0;
1902                     for(int i=0;i<alnLength;i++){
1903                         if(temp[i] != '-'){
1904                             minPos++;
1905                         }
1906                     }
1907                 }
1908                 else if(numDiff == minDiff){
1909                     minCount++;
1910                 }
1911                 
1912             }
1913             
1914             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1915             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1916             else{       //use the best match
1917                 group = minGroup;
1918                 seq.setUnaligned(rawSequence.substr(minPos));
1919                 success = minDiff;
1920             }
1921             
1922             if (alignment != NULL) { delete alignment; }
1923             
1924         }
1925         
1926         return success;
1927         
1928     }
1929     catch(exception& e) {
1930         m->errorOut(e, "TrimOligos", "stripForward");
1931         exit(1);
1932     }
1933 }
1934 //*******************************************************************/
1935 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1936     try {
1937         
1938         if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
1939         
1940         string rawSequence = seq.getUnaligned();
1941         int success = pdiffs + 1;       //guilty until proven innocent
1942         
1943         //can you find the primer
1944         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1945             string oligo = it->first;
1946             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
1947                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1948                 break;
1949             }
1950             
1951             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1952                 group = it->second;
1953                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1954                 if(qual.getName() != ""){
1955                     if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
1956                 }
1957                 success = 0;
1958                 break;
1959             }
1960         }
1961         
1962         //if you found the barcode or if you don't want to allow for diffs
1963         if ((pdiffs == 0) || (success == 0)) { return success; }
1964         
1965         else { //try aligning and see if you can find it
1966             Alignment* alignment;
1967             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1968             else{ alignment = NULL; }
1969             
1970             //can you find the barcode
1971             int minDiff = 1e6;
1972             int minCount = 1;
1973             int minGroup = -1;
1974             int minPos = 0;
1975             
1976             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1977                 string oligo = it->first;
1978                 // int length = oligo.length();
1979                 
1980                 if(rawSequence.length() < maxFPrimerLength){
1981                     success = pdiffs + 100;
1982                     break;
1983                 }
1984                 
1985                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1986                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1987                 oligo = alignment->getSeqAAln();
1988                 string temp = alignment->getSeqBAln();
1989                 
1990                 int alnLength = oligo.length();
1991                 
1992                 for(int i=oligo.length()-1;i>=0;i--){
1993                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1994                 }
1995                 oligo = oligo.substr(0,alnLength);
1996                 temp = temp.substr(0,alnLength);
1997                 
1998                 int numDiff = countDiffs(oligo, temp);
1999                 
2000                 if(numDiff < minDiff){
2001                     minDiff = numDiff;
2002                     minCount = 1;
2003                     minGroup = it->second;
2004                     minPos = 0;
2005                     for(int i=0;i<alnLength;i++){
2006                         if(temp[i] != '-'){
2007                             minPos++;
2008                         }
2009                     }
2010                 }
2011                 else if(numDiff == minDiff){
2012                     minCount++;
2013                 }
2014                 
2015             }
2016             
2017             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
2018             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
2019             else{       //use the best match
2020                 group = minGroup;
2021                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2022                 if(qual.getName() != ""){
2023                     if (!keepForward) { qual.trimQScores(minPos, -1); }
2024                 }
2025                 success = minDiff;
2026             }
2027             
2028             if (alignment != NULL) { delete alignment; }
2029             
2030         }
2031         
2032         return success;
2033         
2034     }
2035     catch(exception& e) {
2036         m->errorOut(e, "TrimOligos", "stripForward");
2037         exit(1);
2038     }
2039 }
2040 //******************************************************************/
2041 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2042     try {
2043         string rawSequence = seq.getUnaligned();
2044         bool success = 0;       //guilty until proven innocent
2045         
2046         for(int i=0;i<revPrimer.size();i++){
2047             string oligo = revPrimer[i];
2048             
2049             if(rawSequence.length() < oligo.length()){
2050                 success = 0;
2051                 break;
2052             }
2053             
2054             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2055                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2056                 if(qual.getName() != ""){
2057                     qual.trimQScores(-1, rawSequence.length()-oligo.length());
2058                 }
2059                 success = 1;
2060                 break;
2061             }
2062         }       
2063         return success;
2064         
2065     }
2066     catch(exception& e) {
2067         m->errorOut(e, "TrimOligos", "stripReverse");
2068         exit(1);
2069     }
2070 }
2071 //******************************************************************/
2072 bool TrimOligos::stripReverse(Sequence& seq){
2073     try {
2074         
2075         string rawSequence = seq.getUnaligned();
2076         bool success = 0;       //guilty until proven innocent
2077         
2078         for(int i=0;i<revPrimer.size();i++){
2079             string oligo = revPrimer[i];
2080             
2081             if(rawSequence.length() < oligo.length()){
2082                 success = 0;
2083                 break;
2084             }
2085             
2086             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2087                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2088                 success = 1;
2089                 break;
2090             }
2091         }       
2092         
2093         return success;
2094         
2095     }
2096     catch(exception& e) {
2097         m->errorOut(e, "TrimOligos", "stripReverse");
2098         exit(1);
2099     }
2100 }
2101 //******************************************************************/
2102 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2103     try {
2104         string rawSequence = seq.getUnaligned();
2105         bool success = ldiffs + 1;      //guilty until proven innocent
2106         
2107         for(int i=0;i<linker.size();i++){
2108             string oligo = linker[i];
2109             
2110             if(rawSequence.length() < oligo.length()){
2111                 success = ldiffs + 10;
2112                 break;
2113             }
2114             
2115             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2116                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2117                 if(qual.getName() != ""){
2118                     qual.trimQScores(oligo.length(), -1);
2119                 }
2120                 success = 0;
2121                 break;
2122             }
2123         }
2124         
2125         //if you found the linker or if you don't want to allow for diffs
2126         if ((ldiffs == 0) || (success == 0)) { return success; }
2127         
2128         else { //try aligning and see if you can find it
2129             Alignment* alignment;
2130             if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }   
2131             else{ alignment = NULL; }
2132             
2133             //can you find the barcode
2134             int minDiff = 1e6;
2135             int minCount = 1;
2136             int minPos = 0;
2137             
2138             for(int i = 0; i < linker.size(); i++){
2139                 string oligo = linker[i];
2140                 // int length = oligo.length();
2141                 
2142                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2143                     success = ldiffs + 10;
2144                     break;
2145                 }
2146                 
2147                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2148                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2149                 oligo = alignment->getSeqAAln();
2150                 string temp = alignment->getSeqBAln();
2151                 
2152                 int alnLength = oligo.length();
2153                 
2154                 for(int i=oligo.length()-1;i>=0;i--){
2155                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2156                 }
2157                 oligo = oligo.substr(0,alnLength);
2158                 temp = temp.substr(0,alnLength);
2159                 
2160                 int numDiff = countDiffs(oligo, temp);
2161                 
2162                 if(numDiff < minDiff){
2163                     minDiff = numDiff;
2164                     minCount = 1;
2165                     minPos = 0;
2166                     for(int i=0;i<alnLength;i++){
2167                         if(temp[i] != '-'){
2168                             minPos++;
2169                         }
2170                     }
2171                 }
2172                 else if(numDiff == minDiff){
2173                     minCount++;
2174                 }
2175                 
2176             }
2177             
2178             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2179             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2180             else{       //use the best match
2181                 seq.setUnaligned(rawSequence.substr(minPos));
2182                 
2183                 if(qual.getName() != ""){
2184                     qual.trimQScores(minPos, -1);
2185                 }
2186                 success = minDiff;
2187             }
2188             
2189             if (alignment != NULL) { delete alignment; }
2190             
2191         }
2192         
2193         
2194         return success;
2195         
2196     }
2197     catch(exception& e) {
2198         m->errorOut(e, "TrimOligos", "stripLinker");
2199         exit(1);
2200     }
2201 }
2202 //******************************************************************/
2203 bool TrimOligos::stripLinker(Sequence& seq){
2204     try {
2205         
2206         string rawSequence = seq.getUnaligned();
2207         bool success = ldiffs +1;       //guilty until proven innocent
2208         
2209         for(int i=0;i<linker.size();i++){
2210             string oligo = linker[i];
2211             
2212             if(rawSequence.length() < oligo.length()){
2213                 success = ldiffs +10;
2214                 break;
2215             }
2216             
2217             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2218                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2219                 success = 0;
2220                 break;
2221             }
2222         }       
2223         
2224         //if you found the linker or if you don't want to allow for diffs
2225         if ((ldiffs == 0) || (success == 0)) { return success; }
2226         
2227         else { //try aligning and see if you can find it
2228             Alignment* alignment;
2229             if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2230             else{ alignment = NULL; }
2231             
2232             //can you find the barcode
2233             int minDiff = 1e6;
2234             int minCount = 1;
2235             int minPos = 0;
2236             
2237             for(int i = 0; i < linker.size(); i++){
2238                 string oligo = linker[i];
2239                 // int length = oligo.length();
2240                 
2241                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2242                     success = ldiffs + 10;
2243                     break;
2244                 }
2245                 
2246                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2247                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2248                 oligo = alignment->getSeqAAln();
2249                 string temp = alignment->getSeqBAln();
2250                 
2251                 int alnLength = oligo.length();
2252                 
2253                 for(int i=oligo.length()-1;i>=0;i--){
2254                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2255                 }
2256                 oligo = oligo.substr(0,alnLength);
2257                 temp = temp.substr(0,alnLength);
2258                 
2259                 int numDiff = countDiffs(oligo, temp);
2260                 
2261                 if(numDiff < minDiff){
2262                     minDiff = numDiff;
2263                     minCount = 1;
2264                     minPos = 0;
2265                     for(int i=0;i<alnLength;i++){
2266                         if(temp[i] != '-'){
2267                             minPos++;
2268                         }
2269                     }
2270                 }
2271                 else if(numDiff == minDiff){
2272                     minCount++;
2273                 }
2274                 
2275             }
2276             
2277             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2278             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2279             else{       //use the best match
2280                 seq.setUnaligned(rawSequence.substr(minPos));
2281                 success = minDiff;
2282             }
2283             
2284             if (alignment != NULL) { delete alignment; }
2285             
2286         }
2287         
2288         return success;
2289         
2290     }
2291     catch(exception& e) {
2292         m->errorOut(e, "TrimOligos", "stripLinker");
2293         exit(1);
2294     }
2295 }
2296
2297 //******************************************************************/
2298 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2299     try {
2300         string rawSequence = seq.getUnaligned();
2301         bool success = sdiffs+1;        //guilty until proven innocent
2302         
2303         for(int i=0;i<spacer.size();i++){
2304             string oligo = spacer[i];
2305             
2306             if(rawSequence.length() < oligo.length()){
2307                 success = sdiffs+10;
2308                 break;
2309             }
2310             
2311             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2312                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2313                 if(qual.getName() != ""){
2314                     qual.trimQScores(oligo.length(), -1);
2315                 }
2316                 success = 0;
2317                 break;
2318             }
2319         }
2320         
2321         //if you found the spacer or if you don't want to allow for diffs
2322         if ((sdiffs == 0) || (success == 0)) { return success; }
2323         
2324         else { //try aligning and see if you can find it
2325             Alignment* alignment;
2326             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2327             else{ alignment = NULL; }
2328             
2329             //can you find the barcode
2330             int minDiff = 1e6;
2331             int minCount = 1;
2332             int minPos = 0;
2333             
2334             for(int i = 0; i < spacer.size(); i++){
2335                 string oligo = spacer[i];
2336                 // int length = oligo.length();
2337                 
2338                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2339                     success = sdiffs + 10;
2340                     break;
2341                 }
2342                 
2343                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2344                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2345                 oligo = alignment->getSeqAAln();
2346                 string temp = alignment->getSeqBAln();
2347                 
2348                 int alnLength = oligo.length();
2349                 
2350                 for(int i=oligo.length()-1;i>=0;i--){
2351                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2352                 }
2353                 oligo = oligo.substr(0,alnLength);
2354                 temp = temp.substr(0,alnLength);
2355                 
2356                 int numDiff = countDiffs(oligo, temp);
2357                 
2358                 if(numDiff < minDiff){
2359                     minDiff = numDiff;
2360                     minCount = 1;
2361                     minPos = 0;
2362                     for(int i=0;i<alnLength;i++){
2363                         if(temp[i] != '-'){
2364                             minPos++;
2365                         }
2366                     }
2367                 }
2368                 else if(numDiff == minDiff){
2369                     minCount++;
2370                 }
2371                 
2372             }
2373             
2374             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2375             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2376             else{       //use the best match
2377                 seq.setUnaligned(rawSequence.substr(minPos));
2378                 
2379                 if(qual.getName() != ""){
2380                     qual.trimQScores(minPos, -1);
2381                 }
2382                 success = minDiff;
2383             }
2384             
2385             if (alignment != NULL) { delete alignment; }
2386             
2387         }
2388         
2389         
2390         return success;
2391         
2392     }
2393     catch(exception& e) {
2394         m->errorOut(e, "TrimOligos", "stripSpacer");
2395         exit(1);
2396     }
2397 }
2398 //******************************************************************/
2399 bool TrimOligos::stripSpacer(Sequence& seq){
2400     try {
2401         
2402         string rawSequence = seq.getUnaligned();
2403         bool success = sdiffs+1;        //guilty until proven innocent
2404         
2405         for(int i=0;i<spacer.size();i++){
2406             string oligo = spacer[i];
2407             
2408             if(rawSequence.length() < oligo.length()){
2409                 success = sdiffs+10;
2410                 break;
2411             }
2412             
2413             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2414                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2415                 success = 0;
2416                 break;
2417             }
2418         }       
2419         
2420         //if you found the spacer or if you don't want to allow for diffs
2421         if ((sdiffs == 0) || (success == 0)) { return success; }
2422         
2423         else { //try aligning and see if you can find it
2424             Alignment* alignment;
2425             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2426             else{ alignment = NULL; }
2427             
2428             //can you find the barcode
2429             int minDiff = 1e6;
2430             int minCount = 1;
2431             int minPos = 0;
2432             
2433             for(int i = 0; i < spacer.size(); i++){
2434                 string oligo = spacer[i];
2435                 // int length = oligo.length();
2436                 
2437                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2438                     success = sdiffs + 10;
2439                     break;
2440                 }
2441                 
2442                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2443                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2444                 oligo = alignment->getSeqAAln();
2445                 string temp = alignment->getSeqBAln();
2446                 
2447                 int alnLength = oligo.length();
2448                 
2449                 for(int i=oligo.length()-1;i>=0;i--){
2450                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2451                 }
2452                 oligo = oligo.substr(0,alnLength);
2453                 temp = temp.substr(0,alnLength);
2454                 
2455                 int numDiff = countDiffs(oligo, temp);
2456                 
2457                 if(numDiff < minDiff){
2458                     minDiff = numDiff;
2459                     minCount = 1;
2460                     minPos = 0;
2461                     for(int i=0;i<alnLength;i++){
2462                         if(temp[i] != '-'){
2463                             minPos++;
2464                         }
2465                     }
2466                 }
2467                 else if(numDiff == minDiff){
2468                     minCount++;
2469                 }
2470                 
2471             }
2472             
2473             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2474             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2475             else{       //use the best match
2476                 seq.setUnaligned(rawSequence.substr(minPos));
2477                 success = minDiff;
2478             }
2479             
2480             if (alignment != NULL) { delete alignment; }
2481             
2482         }
2483         
2484         return success;
2485         
2486     }
2487     catch(exception& e) {
2488         m->errorOut(e, "TrimOligos", "stripSpacer");
2489         exit(1);
2490     }
2491 }
2492
2493 //******************************************************************/
2494 bool TrimOligos::compareDNASeq(string oligo, string seq){
2495     try {
2496         bool success = 1;
2497         int length = oligo.length();
2498         
2499         for(int i=0;i<length;i++){
2500             
2501             if(oligo[i] != seq[i]){
2502                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0; }
2503                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       success = 0;    }
2504                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       success = 0;    }
2505                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       success = 0;    }
2506                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       success = 0;    }
2507                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       success = 0;    }
2508                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       success = 0;    }
2509                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       success = 0;    }
2510                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2511                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2512                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
2513                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }       
2514                 
2515                 if(success == 0)        {       break;  }
2516             }
2517             else{
2518                 success = 1;
2519             }
2520         }
2521         
2522         return success;
2523     }
2524     catch(exception& e) {
2525         m->errorOut(e, "TrimOligos", "compareDNASeq");
2526         exit(1);
2527     }
2528     
2529 }
2530 //********************************************************************/
2531 int TrimOligos::countDiffs(string oligo, string seq){
2532     try {
2533         
2534         int length = oligo.length();
2535         int countDiffs = 0;
2536         
2537         for(int i=0;i<length;i++){
2538             
2539             if(oligo[i] != seq[i]){
2540                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++; }
2541                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       countDiffs++;   }
2542                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       countDiffs++;   }
2543                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       countDiffs++;   }
2544                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       countDiffs++;   }
2545                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       countDiffs++;   }
2546                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       countDiffs++;   }
2547                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       countDiffs++;   }
2548                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2549                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2550                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
2551                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
2552             }
2553             
2554         }
2555         
2556         return countDiffs;
2557     }
2558     catch(exception& e) {
2559         m->errorOut(e, "TrimOligos", "countDiffs");
2560         exit(1);
2561     }
2562 }
2563 //********************************************************************/
2564 string TrimOligos::reverseOligo(string oligo){
2565     try {
2566         string reverse = "";
2567         
2568         for(int i=oligo.length()-1;i>=0;i--){
2569             
2570             if(oligo[i] == 'A') {       reverse += 'T'; }
2571             else if(oligo[i] == 'T'){   reverse += 'A'; }
2572             else if(oligo[i] == 'U'){   reverse += 'A'; }
2573             
2574             else if(oligo[i] == 'G'){   reverse += 'C'; }
2575             else if(oligo[i] == 'C'){   reverse += 'G'; }
2576             
2577             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2578             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2579             
2580             else if(oligo[i] == 'M'){   reverse += 'K'; }
2581             else if(oligo[i] == 'K'){   reverse += 'M'; }
2582             
2583             else if(oligo[i] == 'W'){   reverse += 'W'; }
2584             else if(oligo[i] == 'S'){   reverse += 'S'; }
2585             
2586             else if(oligo[i] == 'B'){   reverse += 'V'; }
2587             else if(oligo[i] == 'V'){   reverse += 'B'; }
2588             
2589             else if(oligo[i] == 'D'){   reverse += 'H'; }
2590             else if(oligo[i] == 'H'){   reverse += 'D'; }
2591             
2592             else if(oligo[i] == '-'){   reverse += '-'; }
2593             else        {       reverse += 'N'; }
2594         }
2595         
2596         
2597         return reverse;
2598     }
2599     catch(exception& e) {
2600         m->errorOut(e, "TrimOligos", "reverseOligo");
2601         exit(1);
2602     }
2603 }
2604
2605 /********************************************************************/
2606
2607