]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / trimoligos.cpp
1 /*
2  * trimoligos.cpp
3  * Mothur
4  *
5  * Created by westcott on 9/1/11.
6  * Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18     try {
19         m = MothurOut::getInstance();
20         paired = false;
21         
22         pdiffs = p;
23         bdiffs = b;
24         ldiffs = l;
25         sdiffs = s;
26         
27         barcodes = br;
28         primers = pr;
29         revPrimer = r;
30         linker = lk;
31         spacer = sp;
32         maxFBarcodeLength = 0;
33         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34             if(it->first.length() > maxFBarcodeLength){
35                 maxFBarcodeLength = it->first.length();
36             }
37         }
38         maxFPrimerLength = 0;
39         map<string,int>::iterator it;
40         for(it=primers.begin();it!=primers.end();it++){
41             if(it->first.length() > maxFPrimerLength){
42                 maxFPrimerLength = it->first.length();
43             }
44         }
45         
46         maxLinkerLength = 0;
47         for(int i = 0; i < linker.size(); i++){
48             if(linker[i].length() > maxLinkerLength){
49                 maxLinkerLength = linker[i].length();
50             }
51         }
52         
53         maxSpacerLength = 0;
54         for(int i = 0; i < spacer.size(); i++){
55             if(spacer[i].length() > maxSpacerLength){
56                 maxSpacerLength = spacer[i].length();
57             }
58         }
59     }
60     catch(exception& e) {
61         m->errorOut(e, "TrimOligos", "TrimOligos");
62         exit(1);
63     }
64 }
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
68     try {
69         m = MothurOut::getInstance();
70         
71         pdiffs = p;
72         bdiffs = b;
73         ldiffs = l;
74         sdiffs = s;
75         paired = true;
76         
77         maxFBarcodeLength = 0;
78         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79             string forward = it->second.forward;
80             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
81             
82             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
83             
84             if (itForward == ifbarcodes.end()) {
85                 vector<int> temp; temp.push_back(it->first);
86                 ifbarcodes[forward] = temp;
87             }else { itForward->second.push_back(it->first); }
88         }
89         
90         maxFPrimerLength = 0;
91         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92             string forward = it->second.forward;
93             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
94             
95             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
96             
97             if (itForward == ifprimers.end()) {
98                 vector<int> temp; temp.push_back(it->first);
99                 ifprimers[forward] = temp;
100             }else { itForward->second.push_back(it->first); }
101         }
102         
103         maxRBarcodeLength = 0;
104         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105             string forward = it->second.reverse;
106             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
107             
108             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
109             
110             if (itForward == irbarcodes.end()) {
111                 vector<int> temp; temp.push_back(it->first);
112                 irbarcodes[forward] = temp;
113             }else { itForward->second.push_back(it->first); }
114         }
115         
116         maxRPrimerLength = 0;
117         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118             string forward = it->second.reverse;
119             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
120             
121             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
122             
123             if (itForward == irprimers.end()) {
124                 vector<int> temp; temp.push_back(it->first);
125                 irprimers[forward] = temp;
126             }else { itForward->second.push_back(it->first); }
127         }
128         
129         ipbarcodes = br;
130         ipprimers = pr;
131     }
132     catch(exception& e) {
133         m->errorOut(e, "TrimOligos", "TrimOligos");
134         exit(1);
135     }
136 }
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
140     try {
141         m = MothurOut::getInstance();
142         
143         pdiffs = p;
144         bdiffs = b;
145         
146         barcodes = br;
147         primers = pr;
148         revPrimer = r;
149         paired = false;
150         
151         maxFBarcodeLength = 0;
152         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153             string oligo = it->first;
154             if(oligo.length() > maxFBarcodeLength){
155                 maxFBarcodeLength = oligo.length();
156             }
157         }
158         maxRBarcodeLength = maxFBarcodeLength;
159         
160         maxFPrimerLength = 0;
161         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162             string oligo = it->first;
163             if(oligo.length() > maxFPrimerLength){
164                 maxFPrimerLength = oligo.length();
165             }
166         }
167         maxRPrimerLength = maxFPrimerLength;
168     }
169     catch(exception& e) {
170         m->errorOut(e, "TrimOligos", "TrimOligos");
171         exit(1);
172     }
173 }
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
178     try {
179         
180         string rawSequence = seq.getUnaligned();
181         
182         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183             string oligo = it->first;
184             
185             if(rawSequence.length() < oligo.length()) { break; }
186             
187             //search for primer
188             int olength = oligo.length();
189             for (int j = 0; j < rawSequence.length()-olength; j++){
190                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191                 string rawChunk = rawSequence.substr(j, olength);
192                 if(compareDNASeq(oligo, rawChunk)) {
193                     primerStart = j;
194                     primerEnd = primerStart + olength;
195                     return true;
196                 }
197                 
198             }
199         }
200         
201         primerStart = 0; primerEnd = 0;
202         //if you don't want to allow for diffs
203         if (pdiffs == 0) { return false; }
204         else { //try aligning and see if you can find it
205             
206             //can you find the barcode
207             int minDiff = 1e6;
208             int minCount = 1;
209             
210             Alignment* alignment;
211             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212             else{ alignment = NULL; }
213             
214             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
215                 
216                 string prim = it->first;
217                 //search for primer
218                 int olength = prim.length();
219                 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
220                 else{
221                     for (int j = 0; j < rawSequence.length()-olength; j++){
222                         
223                         string oligo = it->first;
224                         
225                         if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
226                         
227                         string rawChunk = rawSequence.substr(j, olength+pdiffs);
228                         
229                         //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230                         alignment->alignPrimer(oligo, rawChunk);
231                         oligo = alignment->getSeqAAln();
232                         string temp = alignment->getSeqBAln();
233                         
234                         int alnLength = oligo.length();
235                         
236                         for(int i=oligo.length()-1;i>=0;i--){
237                             if(oligo[i] != '-'){        alnLength = i+1;        break;  }
238                         }
239                         oligo = oligo.substr(0,alnLength);
240                         temp = temp.substr(0,alnLength);
241                         
242                         int numDiff = countDiffs(oligo, temp);
243                         
244                         if(numDiff < minDiff){
245                             minDiff = numDiff;
246                             minCount = 1;
247                             primerStart = j;
248                             primerEnd = primerStart + alnLength;
249                         }else if(numDiff == minDiff){ minCount++; }
250                     }
251                 }
252             }
253             
254             if (alignment != NULL) { delete alignment; }
255             
256             if(minDiff > pdiffs)        {       primerStart = 0; primerEnd = 0; return false;   }       //no good matches
257             else if(minCount > 1)       {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
258             else{ return true; }
259         }
260         
261         primerStart = 0; primerEnd = 0;
262         return false;
263         
264     }
265     catch(exception& e) {
266         m->errorOut(e, "TrimOligos", "stripForward");
267         exit(1);
268     }
269 }
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
272     try {
273         
274         string rawSequence = seq.getUnaligned();
275         
276         for(int i=0;i<revPrimer.size();i++){
277             string oligo = revPrimer[i];
278             if(rawSequence.length() < oligo.length()) { break; }
279             
280             //search for primer
281             int olength = oligo.length();
282             for (int j = rawSequence.length()-olength; j >= 0; j--){
283                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284                 string rawChunk = rawSequence.substr(j, olength);
285                 
286                 if(compareDNASeq(oligo, rawChunk)) {
287                     primerStart = j;
288                     primerEnd = primerStart + olength;
289                     return true;
290                 }
291                 
292             }
293         }
294         
295         primerStart = 0; primerEnd = 0;
296         return false;
297     }
298     catch(exception& e) {
299         m->errorOut(e, "PcrSeqsCommand", "findReverse");
300         exit(1);
301     }
302 }
303 //*******************************************************************/
304
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
306     try {
307         
308         if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
309         
310         string rawSequence = seq.getUnaligned();
311         int success = bdiffs + 1;       //guilty until proven innocent
312         
313         //can you find the barcode
314         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315             string oligo = it->first;
316             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
317                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
318                 break;
319             }
320             
321             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
322                 group = it->second;
323                 seq.setUnaligned(rawSequence.substr(oligo.length()));
324                 
325                 if(qual.getName() != ""){
326                     qual.trimQScores(oligo.length(), -1);
327                 }
328                 
329                 success = 0;
330                 break;
331             }
332         }
333         
334         //if you found the barcode or if you don't want to allow for diffs
335         if ((bdiffs == 0) || (success == 0)) { return success; }
336         
337         else { //try aligning and see if you can find it
338             Alignment* alignment;
339             if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340             else{ alignment = NULL; }
341             
342             //can you find the barcode
343             int minDiff = 1e6;
344             int minCount = 1;
345             int minGroup = -1;
346             int minPos = 0;
347             
348             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349                 string oligo = it->first;
350                 // int length = oligo.length();
351                 
352                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
353                     success = bdiffs + 10;
354                     break;
355                 }
356                 
357                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359                 oligo = alignment->getSeqAAln();
360                 string temp = alignment->getSeqBAln();
361                 
362                 int alnLength = oligo.length();
363                 
364                 for(int i=oligo.length()-1;i>=0;i--){
365                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
366                 }
367                 oligo = oligo.substr(0,alnLength);
368                 temp = temp.substr(0,alnLength);
369                 int numDiff = countDiffs(oligo, temp);
370                 
371                 if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
372                 
373                 if(numDiff < minDiff){
374                     minDiff = numDiff;
375                     minCount = 1;
376                     minGroup = it->second;
377                     minPos = 0;
378                     for(int i=0;i<alnLength;i++){
379                         if(temp[i] != '-'){
380                             minPos++;
381                         }
382                     }
383                 }
384                 else if(numDiff == minDiff){
385                     minCount++;
386                 }
387                 
388             }
389             
390             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
391             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
392             else{       //use the best match
393                 group = minGroup;
394                 seq.setUnaligned(rawSequence.substr(minPos));
395                 
396                 if(qual.getName() != ""){
397                     qual.trimQScores(minPos, -1);
398                 }
399                 success = minDiff;
400             }
401             
402             if (alignment != NULL) { delete alignment; }
403             
404         }
405         
406         return success;
407         
408     }
409     catch(exception& e) {
410         m->errorOut(e, "TrimOligos", "stripBarcode");
411         exit(1);
412     }
413 }
414 //*******************************************************************/
415 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
416     try {
417         //look for forward barcode
418         string rawFSequence = forwardSeq.getUnaligned();
419         string rawRSequence = reverseSeq.getUnaligned();
420         int success = bdiffs + 1;       //guilty until proven innocent
421         
422         //can you find the forward barcode
423         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
424             string foligo = it->second.forward;
425             string roligo = it->second.reverse;
426             
427             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
428                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
429                 break;
430             }
431             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
432                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
433                 break;
434             }
435             
436             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
437                 group = it->first;
438                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
439                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
440                 success = 0;
441                 break;
442             }
443         }
444         
445         //if you found the barcode or if you don't want to allow for diffs
446         if ((bdiffs == 0) || (success == 0)) { return success; }
447         else { //try aligning and see if you can find it
448             Alignment* alignment;
449             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
450             else{ alignment = NULL; }
451             
452             //can you find the barcode
453             int minDiff = 1e6;
454             int minCount = 1;
455             vector< vector<int> > minFGroup;
456             vector<int> minFPos;
457             
458             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
459             /*
460              1 Sarah Westcott
461              2 John Westcott
462              3 Anna Westcott
463              4 Sarah Schloss
464              5 Pat Schloss
465              6 Gail Brown
466              7 Pat Moore
467              only want to look for forward = Sarah, John, Anna, Pat, Gail
468              reverse = Westcott, Schloss, Brown, Moore
469              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
470              */
471             //cout << endl << forwardSeq.getName() << endl;
472             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
473                 string oligo = it->first;
474                 
475                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
476                     success = bdiffs + 10;
477                     break;
478                 }
479                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
480                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
481                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
482                 oligo = alignment->getSeqAAln();
483                 string temp = alignment->getSeqBAln();
484                 
485                 int alnLength = oligo.length();
486                 
487                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
488                 oligo = oligo.substr(0,alnLength);
489                 temp = temp.substr(0,alnLength);
490                 int numDiff = countDiffs(oligo, temp);
491                 
492                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
493                 
494                 if (alnLength == 0) { numDiff = bdiffs + 100; }
495                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
496                 
497                 if(numDiff < minDiff){
498                     minDiff = numDiff;
499                     minCount = 1;
500                     minFGroup.clear();
501                     minFGroup.push_back(it->second);
502                     int tempminFPos = 0;
503                     minFPos.clear();
504                     for(int i=0;i<alnLength;i++){
505                         if(temp[i] != '-'){
506                             tempminFPos++;
507                         }
508                     }
509                     minFPos.push_back(tempminFPos);
510                 }else if(numDiff == minDiff){
511                     minFGroup.push_back(it->second);
512                     int tempminFPos = 0;
513                     for(int i=0;i<alnLength;i++){
514                         if(temp[i] != '-'){
515                             tempminFPos++;
516                         }
517                     }
518                     minFPos.push_back(tempminFPos);
519                 }
520             }
521             
522             //cout << minDiff << '\t' << minCount << '\t' << endl;
523             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
524             else{
525                 //check for reverse match
526                 if (alignment != NULL) { delete alignment; }
527                 
528                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
529                 else{ alignment = NULL; }
530                 
531                 //can you find the barcode
532                 minDiff = 1e6;
533                 minCount = 1;
534                 vector< vector<int> > minRGroup;
535                 vector<int> minRPos;
536                 
537                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
538                     string oligo = it->first;
539                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
540                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
541                         success = bdiffs + 10;
542                         break;
543                     }
544                     
545                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
546                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
547                     oligo = alignment->getSeqAAln();
548                     string temp = alignment->getSeqBAln();
549                     
550                     int alnLength = oligo.length();
551                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
552                     oligo = oligo.substr(0,alnLength);
553                     temp = temp.substr(0,alnLength);
554                     int numDiff = countDiffs(oligo, temp);
555                     
556                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
557                     
558                     if (alnLength == 0) { numDiff = bdiffs + 100; }
559                     
560                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
561                     if(numDiff < minDiff){
562                         minDiff = numDiff;
563                         minCount = 1;
564                         minRGroup.clear();
565                         minRGroup.push_back(it->second);
566                         int tempminRPos = 0;
567                         minRPos.clear();
568                         for(int i=0;i<alnLength;i++){
569                             if(temp[i] != '-'){
570                                 tempminRPos++;
571                             }
572                         }
573                         minRPos.push_back(tempminRPos);
574                     }else if(numDiff == minDiff){
575                         int tempminRPos = 0;
576                         for(int i=0;i<alnLength;i++){
577                             if(temp[i] != '-'){
578                                 tempminRPos++;
579                             }
580                         }
581                         minRPos.push_back(tempminRPos);
582                         minRGroup.push_back(it->second);
583                     }
584                     
585                 }
586                 
587                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
588                 else {
589                     bool foundMatch = false;
590                     vector<int> matches;
591                     for (int i = 0; i < minFGroup.size(); i++) {
592                         for (int j = 0; j < minFGroup[i].size(); j++) {
593                             for (int k = 0; k < minRGroup.size(); k++) {
594                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
595                             }
596                         }
597                     }
598                     
599                     int fStart = 0;
600                     int rStart = 0;
601                     if (matches.size() == 1) {
602                         foundMatch = true;
603                         group = matches[0];
604                         fStart = minFPos[0];
605                         rStart = minRPos[0];
606                     }
607                     
608                     //we have an acceptable match for the forward and reverse, but do they match?
609                     if (foundMatch) {
610                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
611                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
612                         success = minDiff;
613                     }else { success = bdiffs + 100;     }
614                 }
615             }
616             
617             if (alignment != NULL) { delete alignment; }
618         }
619         
620         return success;
621         
622     }
623     catch(exception& e) {
624         m->errorOut(e, "TrimOligos", "stripIBarcode");
625         exit(1);
626     }
627     
628 }
629 //*******************************************************************/
630 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
631     try {
632         //look for forward barcode
633         string rawFSequence = forwardSeq.getUnaligned();
634         string rawRSequence = reverseSeq.getUnaligned();
635         int success = bdiffs + 1;       //guilty until proven innocent
636         
637         //can you find the forward barcode
638         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
639             string foligo = it->second.forward;
640             string roligo = it->second.reverse;
641             
642             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
643                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
644                 break;
645             }
646             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
647                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
648                 break;
649             }
650             
651             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
652                 group = it->first;
653                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
654                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
655                 forwardQual.trimQScores(foligo.length(), -1);
656                 reverseQual.trimQScores(roligo.length(), -1);
657                 success = 0;
658                 break;
659             }
660         }
661         
662         //if you found the barcode or if you don't want to allow for diffs
663         if ((bdiffs == 0) || (success == 0)) { return success; }
664         else { //try aligning and see if you can find it
665             Alignment* alignment;
666             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
667             else{ alignment = NULL; }
668             
669             //can you find the barcode
670             int minDiff = 1e6;
671             int minCount = 1;
672             vector< vector<int> > minFGroup;
673             vector<int> minFPos;
674             
675             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
676             /*
677              1 Sarah Westcott
678              2 John Westcott
679              3 Anna Westcott
680              4 Sarah Schloss
681              5 Pat Schloss
682              6 Gail Brown
683              7 Pat Moore
684              only want to look for forward = Sarah, John, Anna, Pat, Gail
685              reverse = Westcott, Schloss, Brown, Moore
686              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
687              */
688             //cout << endl << forwardSeq.getName() << endl;
689             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
690                 string oligo = it->first;
691                 
692                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
693                     success = bdiffs + 10;
694                     break;
695                 }
696                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
697                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
698                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
699                 oligo = alignment->getSeqAAln();
700                 string temp = alignment->getSeqBAln();
701                 
702                 int alnLength = oligo.length();
703                 
704                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
705                 oligo = oligo.substr(0,alnLength);
706                 temp = temp.substr(0,alnLength);
707                 int numDiff = countDiffs(oligo, temp);
708                 
709                 if (alnLength == 0) { numDiff = bdiffs + 100; }
710                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
711                 
712                 if(numDiff < minDiff){
713                     minDiff = numDiff;
714                     minCount = 1;
715                     minFGroup.clear();
716                     minFGroup.push_back(it->second);
717                     int tempminFPos = 0;
718                     minFPos.clear();
719                     for(int i=0;i<alnLength;i++){
720                         if(temp[i] != '-'){
721                             tempminFPos++;
722                         }
723                     }
724                     minFPos.push_back(tempminFPos);
725                 }else if(numDiff == minDiff){
726                     minFGroup.push_back(it->second);
727                     int tempminFPos = 0;
728                     for(int i=0;i<alnLength;i++){
729                         if(temp[i] != '-'){
730                             tempminFPos++;
731                         }
732                     }
733                     minFPos.push_back(tempminFPos);
734                 }
735             }
736             
737             //cout << minDiff << '\t' << minCount << '\t' << endl;
738             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
739             else{
740                 //check for reverse match
741                 if (alignment != NULL) { delete alignment; }
742                 
743                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
744                 else{ alignment = NULL; }
745                 
746                 //can you find the barcode
747                 minDiff = 1e6;
748                 minCount = 1;
749                 vector< vector<int> > minRGroup;
750                 vector<int> minRPos;
751                 
752                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
753                     string oligo = it->first;
754                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
755                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
756                         success = bdiffs + 10;
757                         break;
758                     }
759                     
760                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
761                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
762                     oligo = alignment->getSeqAAln();
763                     string temp = alignment->getSeqBAln();
764                     
765                     int alnLength = oligo.length();
766                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
767                     oligo = oligo.substr(0,alnLength);
768                     temp = temp.substr(0,alnLength);
769                     int numDiff = countDiffs(oligo, temp);
770                     if (alnLength == 0) { numDiff = bdiffs + 100; }
771                     
772                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + reverseSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
773                     
774                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
775                     if(numDiff < minDiff){
776                         minDiff = numDiff;
777                         minCount = 1;
778                         minRGroup.clear();
779                         minRGroup.push_back(it->second);
780                         int tempminRPos = 0;
781                         minRPos.clear();
782                         for(int i=0;i<alnLength;i++){
783                             if(temp[i] != '-'){
784                                 tempminRPos++;
785                             }
786                         }
787                         minRPos.push_back(tempminRPos);
788                     }else if(numDiff == minDiff){
789                         int tempminRPos = 0;
790                         for(int i=0;i<alnLength;i++){
791                             if(temp[i] != '-'){
792                                 tempminRPos++;
793                             }
794                         }
795                         minRPos.push_back(tempminRPos);
796                         minRGroup.push_back(it->second);
797                     }
798                     
799                 }
800                 
801                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
802                 else {
803                     if (m->debug) {
804                         string fMatches = "";
805                         for (int i = 0; i < minFGroup.size(); i++) { for (int j = 0; j < minFGroup[i].size(); j++) { fMatches += toString(minFGroup[i][j]) + " "; } }
806                         m->mothurOut("[DEBUG]: forward matches =  " + fMatches + "\n");
807                         string rMatches = "";
808                         for (int i = 0; i < minRGroup.size(); i++) { for (int j = 0; j < minRGroup[i].size(); j++) { rMatches += toString(minRGroup[i][j]) + " "; } }
809                         m->mothurOut("[DEBUG]: reverse matches =  " + rMatches + "\n");
810                     }
811                     bool foundMatch = false;
812                     vector<int> matches;
813                     for (int i = 0; i < minFGroup.size(); i++) {
814                         for (int j = 0; j < minFGroup[i].size(); j++) {
815                             for (int k = 0; k < minRGroup.size(); k++) {
816                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
817                             }
818                         }
819                     }
820                     
821                     int fStart = 0;
822                     int rStart = 0;
823                     if (matches.size() == 1) {
824                         foundMatch = true;
825                         group = matches[0];
826                         fStart = minFPos[0];
827                         rStart = minRPos[0];
828                         if (m->debug) { m->mothurOut("[DEBUG]: found match = " + toString(matches[0]) + ".\n");  }
829                     }else {
830                         if (m->debug) { m->mothurOut("[DEBUG]: failed too many matches.\n");  }
831                     }
832                     
833                     //we have an acceptable match for the forward and reverse, but do they match?
834                     if (foundMatch) {
835                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
836                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
837                         forwardQual.trimQScores(fStart, -1);
838                         reverseQual.trimQScores(rStart, -1);
839                         success = minDiff;
840                     }else { success = bdiffs + 100;     }
841                 }
842             }
843             
844             if (alignment != NULL) { delete alignment; }
845         }
846         
847         return success;
848         
849     }
850     catch(exception& e) {
851         m->errorOut(e, "TrimOligos", "stripIBarcode");
852         exit(1);
853     }
854     
855 }
856 //*******************************************************************/
857 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
858     try {
859         //look for forward barcode
860         string rawSeq = seq.getUnaligned();
861         int success = bdiffs + 1;       //guilty until proven innocent
862         //cout << seq.getName() << endl;
863         //can you find the forward barcode
864         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
865             string foligo = it->second.forward;
866             string roligo = it->second.reverse;
867             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
868             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
869             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
870                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
871                 break;
872             }
873             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
874                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
875                 break;
876             }
877             
878             if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
879             
880             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
881                 group = it->first;
882                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
883                 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
884                 if(qual.getName() != ""){
885                     qual.trimQScores(-1, rawSeq.length()-roligo.length());
886                     qual.trimQScores(foligo.length(), -1);
887                 }
888                 success = 0;
889                 break;
890             }
891         }
892         //cout << "success=" << success << endl;
893         //if you found the barcode or if you don't want to allow for diffs
894         if ((bdiffs == 0) || (success == 0)) { return success; }
895         else { //try aligning and see if you can find it
896             Alignment* alignment;
897             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
898             else{ alignment = NULL; }
899             
900             //can you find the barcode
901             int minDiff = 1e6;
902             int minCount = 1;
903             vector< vector<int> > minFGroup;
904             vector<int> minFPos;
905             
906             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
907             /*
908              1 Sarah Westcott
909              2 John Westcott
910              3 Anna Westcott
911              4 Sarah Schloss
912              5 Pat Schloss
913              6 Gail Brown
914              7 Pat Moore
915              only want to look for forward = Sarah, John, Anna, Pat, Gail
916              reverse = Westcott, Schloss, Brown, Moore
917              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
918              */
919             //cout << endl << forwardSeq.getName() << endl;
920             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
921                 string oligo = it->first;
922                 
923                 if(rawSeq.length() < maxFBarcodeLength){        //let's just assume that the barcodes are the same length
924                     success = bdiffs + 10;
925                     break;
926                 }
927                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
928                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
929                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
930                 oligo = alignment->getSeqAAln();
931                 string temp = alignment->getSeqBAln();
932                 
933                 int alnLength = oligo.length();
934                 
935                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
936                 oligo = oligo.substr(0,alnLength);
937                 temp = temp.substr(0,alnLength);
938                 int numDiff = countDiffs(oligo, temp);
939                 
940                 if (alnLength == 0) { numDiff = bdiffs + 100; }
941                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
942                 
943                 if(numDiff < minDiff){
944                     minDiff = numDiff;
945                     minCount = 1;
946                     minFGroup.clear();
947                     minFGroup.push_back(it->second);
948                     int tempminFPos = 0;
949                     minFPos.clear();
950                     for(int i=0;i<alnLength;i++){
951                         if(temp[i] != '-'){
952                             tempminFPos++;
953                         }
954                     }
955                     minFPos.push_back(tempminFPos);
956                 }else if(numDiff == minDiff){
957                     minFGroup.push_back(it->second);
958                     int tempminFPos = 0;
959                     for(int i=0;i<alnLength;i++){
960                         if(temp[i] != '-'){
961                             tempminFPos++;
962                         }
963                     }
964                     minFPos.push_back(tempminFPos);
965                 }
966             }
967             
968             //cout << minDiff << '\t' << minCount << '\t' << endl;
969             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
970             else{
971                 //check for reverse match
972                 if (alignment != NULL) { delete alignment; }
973                 
974                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
975                 else{ alignment = NULL; }
976                 
977                 //can you find the barcode
978                 minDiff = 1e6;
979                 minCount = 1;
980                 vector< vector<int> > minRGroup;
981                 vector<int> minRPos;
982                 
983                 string rawRSequence = reverseOligo(seq.getUnaligned());
984                 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
985                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
986                     string oligo = reverseOligo(it->first);
987                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
988                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
989                         success = bdiffs + 10;
990                         break;
991                     }
992                     
993                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
994                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
995                     oligo = alignment->getSeqAAln();
996                     string temp = alignment->getSeqBAln();
997                     
998                     int alnLength = oligo.length();
999                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1000                     oligo = oligo.substr(0,alnLength);
1001                     temp = temp.substr(0,alnLength);
1002                     int numDiff = countDiffs(oligo, temp);
1003                     if (alnLength == 0) { numDiff = bdiffs + 100; }
1004                     
1005                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1006                     if(numDiff < minDiff){
1007                         minDiff = numDiff;
1008                         minCount = 1;
1009                         minRGroup.clear();
1010                         minRGroup.push_back(it->second);
1011                         int tempminRPos = 0;
1012                         minRPos.clear();
1013                         for(int i=0;i<alnLength;i++){
1014                             if(temp[i] != '-'){
1015                                 tempminRPos++;
1016                             }
1017                         }
1018                         minRPos.push_back(tempminRPos);
1019                     }else if(numDiff == minDiff){
1020                         int tempminRPos = 0;
1021                         for(int i=0;i<alnLength;i++){
1022                             if(temp[i] != '-'){
1023                                 tempminRPos++;
1024                             }
1025                         }
1026                         minRPos.push_back(tempminRPos);
1027                         minRGroup.push_back(it->second);
1028                     }
1029                     
1030                 }
1031                 
1032                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
1033                 else {
1034                     bool foundMatch = false;
1035                     vector<int> matches;
1036                     for (int i = 0; i < minFGroup.size(); i++) {
1037                         for (int j = 0; j < minFGroup[i].size(); j++) {
1038                             for (int k = 0; k < minRGroup.size(); k++) {
1039                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1040                             }
1041                         }
1042                     }
1043                     
1044                     int fStart = 0;
1045                     int rStart = 0;
1046                     if (matches.size() == 1) {
1047                         foundMatch = true;
1048                         group = matches[0];
1049                         fStart = minFPos[0];
1050                         rStart = rawSeq.length() - minRPos[0];
1051                         if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
1052                     }
1053                     
1054                     //we have an acceptable match for the forward and reverse, but do they match?
1055                     if (foundMatch) {
1056                         string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1057                         seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1058                         if(qual.getName() != ""){
1059                             qual.trimQScores(-1, rStart);
1060                             qual.trimQScores(fStart, -1);
1061                         }
1062                         success = minDiff;
1063                         //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1064                     }else { success = bdiffs + 100;     }
1065                 }
1066             }
1067             
1068             if (alignment != NULL) { delete alignment; }
1069         }
1070         
1071         return success;
1072         
1073     }
1074     catch(exception& e) {
1075         m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1076         exit(1);
1077     }
1078     
1079 }
1080 //*******************************************************************/
1081 int TrimOligos::stripPairedBarcode(Sequence& seq, int& group){
1082     try {
1083         //look for forward barcode
1084         string rawSeq = seq.getUnaligned();
1085         int success = bdiffs + 1;       //guilty until proven innocent
1086         //cout << seq.getName() << endl;
1087         //can you find the forward barcode
1088         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
1089             string foligo = it->second.forward;
1090             string roligo = it->second.reverse;
1091             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1092             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1093             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
1094                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1095                 break;
1096             }
1097             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
1098                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1099                 break;
1100             }
1101             
1102             if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
1103             
1104             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1105                 group = it->first;
1106                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1107                 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1108                 success = 0;
1109                 break;
1110             }
1111         }
1112         //cout << "success=" << success << endl;
1113         //if you found the barcode or if you don't want to allow for diffs
1114         if ((bdiffs == 0) || (success == 0)) { return success; }
1115         else { //try aligning and see if you can find it
1116             Alignment* alignment;
1117             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1118             else{ alignment = NULL; }
1119             
1120             //can you find the barcode
1121             int minDiff = 1e6;
1122             int minCount = 1;
1123             vector< vector<int> > minFGroup;
1124             vector<int> minFPos;
1125             
1126             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1127             /*
1128              1 Sarah Westcott
1129              2 John Westcott
1130              3 Anna Westcott
1131              4 Sarah Schloss
1132              5 Pat Schloss
1133              6 Gail Brown
1134              7 Pat Moore
1135              only want to look for forward = Sarah, John, Anna, Pat, Gail
1136              reverse = Westcott, Schloss, Brown, Moore
1137              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1138              */
1139             //cout << endl << forwardSeq.getName() << endl;
1140             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
1141                 string oligo = it->first;
1142                 
1143                 if(rawSeq.length() < maxFBarcodeLength){        //let's just assume that the barcodes are the same length
1144                     success = bdiffs + 10;
1145                     break;
1146                 }
1147                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
1148                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1149                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
1150                 oligo = alignment->getSeqAAln();
1151                 string temp = alignment->getSeqBAln();
1152                 
1153                 int alnLength = oligo.length();
1154                 
1155                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1156                 oligo = oligo.substr(0,alnLength);
1157                 temp = temp.substr(0,alnLength);
1158                 int numDiff = countDiffs(oligo, temp);
1159                 
1160                 if (alnLength == 0) { numDiff = bdiffs + 100; }
1161                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1162                 
1163                 if(numDiff < minDiff){
1164                     minDiff = numDiff;
1165                     minCount = 1;
1166                     minFGroup.clear();
1167                     minFGroup.push_back(it->second);
1168                     int tempminFPos = 0;
1169                     minFPos.clear();
1170                     for(int i=0;i<alnLength;i++){
1171                         if(temp[i] != '-'){
1172                             tempminFPos++;
1173                         }
1174                     }
1175                     minFPos.push_back(tempminFPos);
1176                 }else if(numDiff == minDiff){
1177                     minFGroup.push_back(it->second);
1178                     int tempminFPos = 0;
1179                     for(int i=0;i<alnLength;i++){
1180                         if(temp[i] != '-'){
1181                             tempminFPos++;
1182                         }
1183                     }
1184                     minFPos.push_back(tempminFPos);
1185                 }
1186             }
1187             
1188             //cout << minDiff << '\t' << minCount << '\t' << endl;
1189             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
1190             else{
1191                 //check for reverse match
1192                 if (alignment != NULL) { delete alignment; }
1193                 
1194                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
1195                 else{ alignment = NULL; }
1196                 
1197                 //can you find the barcode
1198                 minDiff = 1e6;
1199                 minCount = 1;
1200                 vector< vector<int> > minRGroup;
1201                 vector<int> minRPos;
1202                 
1203                 string rawRSequence = reverseOligo(seq.getUnaligned());
1204                 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
1205                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
1206                     string oligo = reverseOligo(it->first);
1207                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
1208                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
1209                         success = bdiffs + 10;
1210                         break;
1211                     }
1212                     
1213                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1214                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
1215                     oligo = alignment->getSeqAAln();
1216                     string temp = alignment->getSeqBAln();
1217                     
1218                     int alnLength = oligo.length();
1219                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1220                     oligo = oligo.substr(0,alnLength);
1221                     temp = temp.substr(0,alnLength);
1222                     int numDiff = countDiffs(oligo, temp);
1223                     if (alnLength == 0) { numDiff = bdiffs + 100; }
1224                     
1225                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1226                     if(numDiff < minDiff){
1227                         minDiff = numDiff;
1228                         minCount = 1;
1229                         minRGroup.clear();
1230                         minRGroup.push_back(it->second);
1231                         int tempminRPos = 0;
1232                         minRPos.clear();
1233                         for(int i=0;i<alnLength;i++){
1234                             if(temp[i] != '-'){
1235                                 tempminRPos++;
1236                             }
1237                         }
1238                         minRPos.push_back(tempminRPos);
1239                     }else if(numDiff == minDiff){
1240                         int tempminRPos = 0;
1241                         for(int i=0;i<alnLength;i++){
1242                             if(temp[i] != '-'){
1243                                 tempminRPos++;
1244                             }
1245                         }
1246                         minRPos.push_back(tempminRPos);
1247                         minRGroup.push_back(it->second);
1248                     }
1249                     
1250                 }
1251                 
1252                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
1253                 else {
1254                     bool foundMatch = false;
1255                     vector<int> matches;
1256                     for (int i = 0; i < minFGroup.size(); i++) {
1257                         for (int j = 0; j < minFGroup[i].size(); j++) {
1258                             for (int k = 0; k < minRGroup.size(); k++) {
1259                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1260                             }
1261                         }
1262                     }
1263                     
1264                     int fStart = 0;
1265                     int rStart = 0;
1266                     if (matches.size() == 1) {
1267                         foundMatch = true;
1268                         group = matches[0];
1269                         fStart = minFPos[0];
1270                         rStart = rawSeq.length() - minRPos[0];
1271                         if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
1272                     }
1273                     
1274                     //we have an acceptable match for the forward and reverse, but do they match?
1275                     if (foundMatch) {
1276                         string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1277                         seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1278                         success = minDiff;
1279                         //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1280                     }else { success = bdiffs + 100;     }
1281                 }
1282             }
1283             
1284             if (alignment != NULL) { delete alignment; }
1285         }
1286         
1287         return success;
1288         
1289     }
1290     catch(exception& e) {
1291         m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1292         exit(1);
1293     }
1294     
1295 }
1296 //*******************************************************************/
1297 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1298     try {
1299         //look for forward
1300         string rawSeq = seq.getUnaligned();
1301         int success = pdiffs + 1;       //guilty until proven innocent
1302         //cout << seq.getName() << endl;
1303         //can you find the forward
1304         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1305             string foligo = it->second.forward;
1306             string roligo = it->second.reverse;
1307             
1308             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1309             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1310             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
1311                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1312                 break;
1313             }
1314             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
1315                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1316                 break;
1317             }
1318             
1319             if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
1320             
1321             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1322                 group = it->first;
1323                 if (!keepForward) {
1324                     string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1325                     seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1326                     if(qual.getName() != ""){
1327                         qual.trimQScores(-1, rawSeq.length()-roligo.length());
1328                         qual.trimQScores(foligo.length(), -1);
1329                     }
1330                 }
1331                 success = 0;
1332                 break;
1333             }
1334         }
1335         //cout << "success=" << success << endl;
1336         //if you found the barcode or if you don't want to allow for diffs
1337         if ((pdiffs == 0) || (success == 0)) { return success; }
1338         else { //try aligning and see if you can find it
1339             Alignment* alignment;
1340             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1341             else{ alignment = NULL; }
1342             
1343             //can you find the barcode
1344             int minDiff = 1e6;
1345             int minCount = 1;
1346             vector< vector<int> > minFGroup;
1347             vector<int> minFPos;
1348             
1349             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1350             /*
1351              1 Sarah Westcott
1352              2 John Westcott
1353              3 Anna Westcott
1354              4 Sarah Schloss
1355              5 Pat Schloss
1356              6 Gail Brown
1357              7 Pat Moore
1358              only want to look for forward = Sarah, John, Anna, Pat, Gail
1359              reverse = Westcott, Schloss, Brown, Moore
1360              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1361              */
1362             //cout << endl << forwardSeq.getName() << endl;
1363             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1364                 string oligo = it->first;
1365                 
1366                 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1367                     success = pdiffs + 10;
1368                     break;
1369                 }
1370                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
1371                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1372                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1373                 oligo = alignment->getSeqAAln();
1374                 string temp = alignment->getSeqBAln();
1375                 
1376                 int alnLength = oligo.length();
1377                 
1378                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1379                 oligo = oligo.substr(0,alnLength);
1380                 temp = temp.substr(0,alnLength);
1381                 int numDiff = countDiffs(oligo, temp);
1382                 
1383                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1384                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1385                 
1386                 if(numDiff < minDiff){
1387                     minDiff = numDiff;
1388                     minCount = 1;
1389                     minFGroup.clear();
1390                     minFGroup.push_back(it->second);
1391                     int tempminFPos = 0;
1392                     minFPos.clear();
1393                     for(int i=0;i<alnLength;i++){
1394                         if(temp[i] != '-'){
1395                             tempminFPos++;
1396                         }
1397                     }
1398                     minFPos.push_back(tempminFPos);
1399                 }else if(numDiff == minDiff){
1400                     minFGroup.push_back(it->second);
1401                     int tempminFPos = 0;
1402                     for(int i=0;i<alnLength;i++){
1403                         if(temp[i] != '-'){
1404                             tempminFPos++;
1405                         }
1406                     }
1407                     minFPos.push_back(tempminFPos);
1408                 }
1409             }
1410             
1411             //cout << minDiff << '\t' << minCount << '\t' << endl;
1412             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1413             else{
1414                 //check for reverse match
1415                 if (alignment != NULL) { delete alignment; }
1416                 
1417                 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1418                 else{ alignment = NULL; }
1419                 
1420                 //can you find the barcode
1421                 minDiff = 1e6;
1422                 minCount = 1;
1423                 vector< vector<int> > minRGroup;
1424                 vector<int> minRPos;
1425                 
1426                 string rawRSequence = reverseOligo(seq.getUnaligned());
1427                 
1428                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1429                     string oligo = reverseOligo(it->first);
1430                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1431                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1432                         success = pdiffs + 10;
1433                         break;
1434                     }
1435                     
1436                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1437                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1438                     oligo = alignment->getSeqAAln();
1439                     string temp = alignment->getSeqBAln();
1440                     
1441                     int alnLength = oligo.length();
1442                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1443                     oligo = oligo.substr(0,alnLength);
1444                     temp = temp.substr(0,alnLength);
1445                     int numDiff = countDiffs(oligo, temp);
1446                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1447                     
1448                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1449                     if(numDiff < minDiff){
1450                         minDiff = numDiff;
1451                         minCount = 1;
1452                         minRGroup.clear();
1453                         minRGroup.push_back(it->second);
1454                         int tempminRPos = 0;
1455                         minRPos.clear();
1456                         for(int i=0;i<alnLength;i++){
1457                             if(temp[i] != '-'){
1458                                 tempminRPos++;
1459                             }
1460                         }
1461                         minRPos.push_back(tempminRPos);
1462                     }else if(numDiff == minDiff){
1463                         int tempminRPos = 0;
1464                         for(int i=0;i<alnLength;i++){
1465                             if(temp[i] != '-'){
1466                                 tempminRPos++;
1467                             }
1468                         }
1469                         minRPos.push_back(tempminRPos);
1470                         minRGroup.push_back(it->second);
1471                     }
1472                     
1473                 }
1474                 
1475                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1476                 else {
1477                     bool foundMatch = false;
1478                     vector<int> matches;
1479                     for (int i = 0; i < minFGroup.size(); i++) {
1480                         for (int j = 0; j < minFGroup[i].size(); j++) {
1481                             for (int k = 0; k < minRGroup.size(); k++) {
1482                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1483                             }
1484                         }
1485                     }
1486                     
1487                     int fStart = 0;
1488                     int rStart = 0;
1489                     if (matches.size() == 1) {
1490                         foundMatch = true;
1491                         group = matches[0];
1492                         fStart = minFPos[0];
1493                         rStart = rawSeq.length() - minRPos[0];
1494                         if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
1495                     }
1496                     
1497                     //we have an acceptable match for the forward and reverse, but do they match?
1498                     if (foundMatch) {
1499                         if (!keepForward) {
1500                             string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1501                             seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1502                             if(qual.getName() != ""){
1503                                 qual.trimQScores(-1, rStart);
1504                                 qual.trimQScores(fStart, -1);
1505                             }
1506                         }
1507                         success = minDiff;
1508                         //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1509                     }else { success = pdiffs + 100;     }
1510                 }
1511             }
1512             
1513             if (alignment != NULL) { delete alignment; }
1514         }
1515         
1516         return success;
1517         
1518     }
1519     catch(exception& e) {
1520         m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1521         exit(1);
1522     }
1523     
1524 }
1525 //*******************************************************************/
1526 int TrimOligos::stripPairedPrimers(Sequence& seq, int& group){
1527     try {
1528         //look for forward
1529         string rawSeq = seq.getUnaligned();
1530         int success = pdiffs + 1;       //guilty until proven innocent
1531         //cout << seq.getName() << endl;
1532         //can you find the forward
1533         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1534             string foligo = it->second.forward;
1535             string roligo = it->second.reverse;
1536             
1537             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1538             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1539             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
1540                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1541                 break;
1542             }
1543             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
1544                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1545                 break;
1546             }
1547             
1548             if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
1549             
1550             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1551                 group = it->first;
1552                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1553                 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1554                 success = 0;
1555                 break;
1556             }
1557         }
1558         //cout << "success=" << success << endl;
1559         //if you found the barcode or if you don't want to allow for diffs
1560         if ((pdiffs == 0) || (success == 0)) { return success; }
1561         else { //try aligning and see if you can find it
1562             Alignment* alignment;
1563             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1564             else{ alignment = NULL; }
1565             
1566             //can you find the barcode
1567             int minDiff = 1e6;
1568             int minCount = 1;
1569             vector< vector<int> > minFGroup;
1570             vector<int> minFPos;
1571             
1572             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1573             /*
1574              1 Sarah Westcott
1575              2 John Westcott
1576              3 Anna Westcott
1577              4 Sarah Schloss
1578              5 Pat Schloss
1579              6 Gail Brown
1580              7 Pat Moore
1581              only want to look for forward = Sarah, John, Anna, Pat, Gail
1582              reverse = Westcott, Schloss, Brown, Moore
1583              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1584              */
1585             //cout << endl << forwardSeq.getName() << endl;
1586             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1587                 string oligo = it->first;
1588                 
1589                 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1590                     success = pdiffs + 10;
1591                     break;
1592                 }
1593                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
1594                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1595                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1596                 oligo = alignment->getSeqAAln();
1597                 string temp = alignment->getSeqBAln();
1598                 
1599                 int alnLength = oligo.length();
1600                 
1601                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1602                 oligo = oligo.substr(0,alnLength);
1603                 temp = temp.substr(0,alnLength);
1604                 int numDiff = countDiffs(oligo, temp);
1605                 
1606                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1607                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1608                 
1609                 if(numDiff < minDiff){
1610                     minDiff = numDiff;
1611                     minCount = 1;
1612                     minFGroup.clear();
1613                     minFGroup.push_back(it->second);
1614                     int tempminFPos = 0;
1615                     minFPos.clear();
1616                     for(int i=0;i<alnLength;i++){
1617                         if(temp[i] != '-'){
1618                             tempminFPos++;
1619                         }
1620                     }
1621                     minFPos.push_back(tempminFPos);
1622                 }else if(numDiff == minDiff){
1623                     minFGroup.push_back(it->second);
1624                     int tempminFPos = 0;
1625                     for(int i=0;i<alnLength;i++){
1626                         if(temp[i] != '-'){
1627                             tempminFPos++;
1628                         }
1629                     }
1630                     minFPos.push_back(tempminFPos);
1631                 }
1632             }
1633             
1634             //cout << minDiff << '\t' << minCount << '\t' << endl;
1635             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1636             else{
1637                 //check for reverse match
1638                 if (alignment != NULL) { delete alignment; }
1639                 
1640                 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1641                 else{ alignment = NULL; }
1642                 
1643                 //can you find the barcode
1644                 minDiff = 1e6;
1645                 minCount = 1;
1646                 vector< vector<int> > minRGroup;
1647                 vector<int> minRPos;
1648                 
1649                 string rawRSequence = reverseOligo(seq.getUnaligned());
1650                 
1651                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1652                     string oligo = reverseOligo(it->first);
1653                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1654                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1655                         success = pdiffs + 10;
1656                         break;
1657                     }
1658                     
1659                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1660                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1661                     oligo = alignment->getSeqAAln();
1662                     string temp = alignment->getSeqBAln();
1663                     
1664                     int alnLength = oligo.length();
1665                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1666                     oligo = oligo.substr(0,alnLength);
1667                     temp = temp.substr(0,alnLength);
1668                     int numDiff = countDiffs(oligo, temp);
1669                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1670                     
1671                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1672                     if(numDiff < minDiff){
1673                         minDiff = numDiff;
1674                         minCount = 1;
1675                         minRGroup.clear();
1676                         minRGroup.push_back(it->second);
1677                         int tempminRPos = 0;
1678                         minRPos.clear();
1679                         for(int i=0;i<alnLength;i++){
1680                             if(temp[i] != '-'){
1681                                 tempminRPos++;
1682                             }
1683                         }
1684                         minRPos.push_back(tempminRPos);
1685                     }else if(numDiff == minDiff){
1686                         int tempminRPos = 0;
1687                         for(int i=0;i<alnLength;i++){
1688                             if(temp[i] != '-'){
1689                                 tempminRPos++;
1690                             }
1691                         }
1692                         minRPos.push_back(tempminRPos);
1693                         minRGroup.push_back(it->second);
1694                     }
1695                     
1696                 }
1697                 
1698                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1699                 else {
1700                     bool foundMatch = false;
1701                     vector<int> matches;
1702                     for (int i = 0; i < minFGroup.size(); i++) {
1703                         for (int j = 0; j < minFGroup[i].size(); j++) {
1704                             for (int k = 0; k < minRGroup.size(); k++) {
1705                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1706                             }
1707                         }
1708                     }
1709                     
1710                     int fStart = 0;
1711                     int rStart = 0;
1712                     if (matches.size() == 1) {
1713                         foundMatch = true;
1714                         group = matches[0];
1715                         fStart = minFPos[0];
1716                         rStart = rawSeq.length() - minRPos[0];
1717                         if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
1718                     }
1719                     
1720                     //we have an acceptable match for the forward and reverse, but do they match?
1721                     if (foundMatch) {
1722                         string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1723                         seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1724                         success = minDiff;
1725                         //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1726                     }else { success = pdiffs + 100;     }
1727                 }
1728             }
1729             
1730             if (alignment != NULL) { delete alignment; }
1731         }
1732         
1733         return success;
1734         
1735     }
1736     catch(exception& e) {
1737         m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1738         exit(1);
1739     }
1740     
1741 }
1742 //*******************************************************************/
1743 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1744     try {
1745         //look for forward barcode
1746         string rawFSequence = forwardSeq.getUnaligned();
1747         string rawRSequence = reverseSeq.getUnaligned();
1748         int success = pdiffs + 1;       //guilty until proven innocent
1749         
1750         //can you find the forward barcode
1751         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1752             string foligo = it->second.forward;
1753             string roligo = it->second.reverse;
1754             
1755             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1756                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1757                 break;
1758             }
1759             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1760                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1761                 break;
1762             }
1763             
1764             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
1765                 group = it->first;
1766                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1767                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1768                 forwardQual.trimQScores(foligo.length(), -1);
1769                 reverseQual.trimQScores(roligo.length(), -1);
1770                 success = 0;
1771                 break;
1772             }
1773         }
1774         
1775         //if you found the barcode or if you don't want to allow for diffs
1776         if ((pdiffs == 0) || (success == 0)) { return success; }
1777         else { //try aligning and see if you can find it
1778             Alignment* alignment;
1779             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1780             else{ alignment = NULL; }
1781             
1782             //can you find the barcode
1783             int minDiff = 1e6;
1784             int minCount = 1;
1785             vector< vector<int> > minFGroup;
1786             vector<int> minFPos;
1787             
1788             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1789             /*
1790              1 Sarah Westcott
1791              2 John Westcott
1792              3 Anna Westcott
1793              4 Sarah Schloss
1794              5 Pat Schloss
1795              6 Gail Brown
1796              7 Pat Moore
1797              only want to look for forward = Sarah, John, Anna, Pat, Gail
1798              reverse = Westcott, Schloss, Brown, Moore
1799              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1800              */
1801             //cout << endl << forwardSeq.getName() << endl;
1802             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1803                 string oligo = it->first;
1804                 
1805                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1806                     success = pdiffs + 10;
1807                     break;
1808                 }
1809                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1810                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1811                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1812                 oligo = alignment->getSeqAAln();
1813                 string temp = alignment->getSeqBAln();
1814                 
1815                 int alnLength = oligo.length();
1816                 
1817                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1818                 oligo = oligo.substr(0,alnLength);
1819                 temp = temp.substr(0,alnLength);
1820                 int numDiff = countDiffs(oligo, temp);
1821                 
1822                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1823                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1824                 
1825                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
1826                 
1827                 if(numDiff < minDiff){
1828                     minDiff = numDiff;
1829                     minCount = 1;
1830                     minFGroup.clear();
1831                     minFGroup.push_back(it->second);
1832                     int tempminFPos = 0;
1833                     minFPos.clear();
1834                     for(int i=0;i<alnLength;i++){
1835                         if(temp[i] != '-'){
1836                             tempminFPos++;
1837                         }
1838                     }
1839                     minFPos.push_back(tempminFPos);
1840                 }else if(numDiff == minDiff){
1841                     minFGroup.push_back(it->second);
1842                     int tempminFPos = 0;
1843                     for(int i=0;i<alnLength;i++){
1844                         if(temp[i] != '-'){
1845                             tempminFPos++;
1846                         }
1847                     }
1848                     minFPos.push_back(tempminFPos);
1849                 }
1850             }
1851             
1852             //cout << minDiff << '\t' << minCount << '\t' << endl;
1853             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1854             else{
1855                 //check for reverse match
1856                 if (alignment != NULL) { delete alignment; }
1857                 
1858                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1859                 else{ alignment = NULL; }
1860                 
1861                 //can you find the barcode
1862                 minDiff = 1e6;
1863                 minCount = 1;
1864                 vector< vector<int> > minRGroup;
1865                 vector<int> minRPos;
1866                 
1867                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1868                     string oligo = it->first;
1869                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1870                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1871                         success = pdiffs + 10;
1872                         break;
1873                     }
1874                     
1875                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1876                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1877                     oligo = alignment->getSeqAAln();
1878                     string temp = alignment->getSeqBAln();
1879                     
1880                     int alnLength = oligo.length();
1881                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1882                     oligo = oligo.substr(0,alnLength);
1883                     temp = temp.substr(0,alnLength);
1884                     int numDiff = countDiffs(oligo, temp);
1885                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1886                     
1887                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
1888                     
1889                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1890                     if(numDiff < minDiff){
1891                         minDiff = numDiff;
1892                         minCount = 1;
1893                         minRGroup.clear();
1894                         minRGroup.push_back(it->second);
1895                         int tempminRPos = 0;
1896                         minRPos.clear();
1897                         for(int i=0;i<alnLength;i++){
1898                             if(temp[i] != '-'){
1899                                 tempminRPos++;
1900                             }
1901                         }
1902                         minRPos.push_back(tempminRPos);
1903                     }else if(numDiff == minDiff){
1904                         int tempminRPos = 0;
1905                         for(int i=0;i<alnLength;i++){
1906                             if(temp[i] != '-'){
1907                                 tempminRPos++;
1908                             }
1909                         }
1910                         minRPos.push_back(tempminRPos);
1911                         minRGroup.push_back(it->second);
1912                     }
1913                     
1914                 }
1915                 
1916                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1917                 else {
1918                     bool foundMatch = false;
1919                     vector<int> matches;
1920                     for (int i = 0; i < minFGroup.size(); i++) {
1921                         for (int j = 0; j < minFGroup[i].size(); j++) {
1922                             for (int k = 0; k < minRGroup.size(); k++) {
1923                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1924                             }
1925                         }
1926                     }
1927                     
1928                     int fStart = 0;
1929                     int rStart = 0;
1930                     if (matches.size() == 1) {
1931                         foundMatch = true;
1932                         group = matches[0];
1933                         fStart = minFPos[0];
1934                         rStart = minRPos[0];
1935                     }
1936                     
1937                     //we have an acceptable match for the forward and reverse, but do they match?
1938                     if (foundMatch) {
1939                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1940                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1941                         forwardQual.trimQScores(fStart, -1);
1942                         reverseQual.trimQScores(rStart, -1);
1943                         success = minDiff;
1944                     }else { success = pdiffs + 100;     }
1945                 }
1946             }
1947             
1948             if (alignment != NULL) { delete alignment; }
1949         }
1950         
1951         return success;
1952         
1953     }
1954     catch(exception& e) {
1955         m->errorOut(e, "TrimOligos", "stripIForward");
1956         exit(1);
1957     }
1958     
1959 }
1960 //*******************************************************************/
1961 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1962     try {
1963         //look for forward barcode
1964         string rawFSequence = forwardSeq.getUnaligned();
1965         string rawRSequence = reverseSeq.getUnaligned();
1966         int success = pdiffs + 1;       //guilty until proven innocent
1967         
1968         //can you find the forward barcode
1969         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1970             string foligo = it->second.forward;
1971             string roligo = it->second.reverse;
1972             
1973             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1974                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1975                 break;
1976             }
1977             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1978                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1979                 break;
1980             }
1981             
1982             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1983                 group = it->first;
1984                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1985                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1986                 success = 0;
1987                 break;
1988             }
1989         }
1990         
1991         //if you found the barcode or if you don't want to allow for diffs
1992         if ((pdiffs == 0) || (success == 0)) { return success; }
1993         else { //try aligning and see if you can find it
1994             Alignment* alignment;
1995             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1996             else{ alignment = NULL; }
1997             
1998             //can you find the barcode
1999             int minDiff = 1e6;
2000             int minCount = 1;
2001             vector< vector<int> > minFGroup;
2002             vector<int> minFPos;
2003             
2004             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
2005             /*
2006              1 Sarah Westcott
2007              2 John Westcott
2008              3 Anna Westcott
2009              4 Sarah Schloss
2010              5 Pat Schloss
2011              6 Gail Brown
2012              7 Pat Moore
2013              only want to look for forward = Sarah, John, Anna, Pat, Gail
2014              reverse = Westcott, Schloss, Brown, Moore
2015              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
2016              */
2017             //cout << endl << forwardSeq.getName() << endl;
2018             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
2019                 string oligo = it->first;
2020                 
2021                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
2022                     success = pdiffs + 10;
2023                     break;
2024                 }
2025                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
2026                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2027                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
2028                 oligo = alignment->getSeqAAln();
2029                 string temp = alignment->getSeqBAln();
2030                 
2031                 int alnLength = oligo.length();
2032                 
2033                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
2034                 oligo = oligo.substr(0,alnLength);
2035                 temp = temp.substr(0,alnLength);
2036                 int numDiff = countDiffs(oligo, temp);
2037                 
2038                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
2039                 
2040                 if (alnLength == 0) { numDiff = pdiffs + 100; }
2041                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
2042                 
2043                 if(numDiff < minDiff){
2044                     minDiff = numDiff;
2045                     minCount = 1;
2046                     minFGroup.clear();
2047                     minFGroup.push_back(it->second);
2048                     int tempminFPos = 0;
2049                     minFPos.clear();
2050                     for(int i=0;i<alnLength;i++){
2051                         if(temp[i] != '-'){
2052                             tempminFPos++;
2053                         }
2054                     }
2055                     minFPos.push_back(tempminFPos);
2056                 }else if(numDiff == minDiff){
2057                     minFGroup.push_back(it->second);
2058                     int tempminFPos = 0;
2059                     for(int i=0;i<alnLength;i++){
2060                         if(temp[i] != '-'){
2061                             tempminFPos++;
2062                         }
2063                     }
2064                     minFPos.push_back(tempminFPos);
2065                 }
2066             }
2067             
2068             //cout << minDiff << '\t' << minCount << '\t' << endl;
2069             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
2070             else{
2071                 //check for reverse match
2072                 if (alignment != NULL) { delete alignment; }
2073                 
2074                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
2075                 else{ alignment = NULL; }
2076                 
2077                 //can you find the barcode
2078                 minDiff = 1e6;
2079                 minCount = 1;
2080                 vector< vector<int> > minRGroup;
2081                 vector<int> minRPos;
2082                 
2083                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
2084                     string oligo = it->first;
2085                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
2086                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
2087                         success = pdiffs + 10;
2088                         break;
2089                     }
2090                     
2091                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2092                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
2093                     oligo = alignment->getSeqAAln();
2094                     string temp = alignment->getSeqBAln();
2095                     
2096                     int alnLength = oligo.length();
2097                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
2098                     oligo = oligo.substr(0,alnLength);
2099                     temp = temp.substr(0,alnLength);
2100                     int numDiff = countDiffs(oligo, temp);
2101                     
2102                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
2103                     
2104                     if (alnLength == 0) { numDiff = pdiffs + 100; }
2105                     
2106                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
2107                     if(numDiff < minDiff){
2108                         minDiff = numDiff;
2109                         minCount = 1;
2110                         minRGroup.clear();
2111                         minRGroup.push_back(it->second);
2112                         int tempminRPos = 0;
2113                         minRPos.clear();
2114                         for(int i=0;i<alnLength;i++){
2115                             if(temp[i] != '-'){
2116                                 tempminRPos++;
2117                             }
2118                         }
2119                         minRPos.push_back(tempminRPos);
2120                     }else if(numDiff == minDiff){
2121                         int tempminRPos = 0;
2122                         for(int i=0;i<alnLength;i++){
2123                             if(temp[i] != '-'){
2124                                 tempminRPos++;
2125                             }
2126                         }
2127                         minRPos.push_back(tempminRPos);
2128                         minRGroup.push_back(it->second);
2129                     }
2130                     
2131                 }
2132                 
2133                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
2134                 else {
2135                     bool foundMatch = false;
2136                     vector<int> matches;
2137                     for (int i = 0; i < minFGroup.size(); i++) {
2138                         for (int j = 0; j < minFGroup[i].size(); j++) {
2139                             for (int k = 0; k < minRGroup.size(); k++) {
2140                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
2141                             }
2142                         }
2143                     }
2144                     
2145                     int fStart = 0;
2146                     int rStart = 0;
2147                     if (matches.size() == 1) {
2148                         foundMatch = true;
2149                         group = matches[0];
2150                         fStart = minFPos[0];
2151                         rStart = minRPos[0];
2152                     }
2153                     
2154                     //we have an acceptable match for the forward and reverse, but do they match?
2155                     if (foundMatch) {
2156                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
2157                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
2158                         success = minDiff;
2159                     }else { success = pdiffs + 100;     }
2160                 }
2161             }
2162             
2163             if (alignment != NULL) { delete alignment; }
2164         }
2165         
2166         return success;
2167         
2168     }
2169     catch(exception& e) {
2170         m->errorOut(e, "TrimOligos", "stripIForward");
2171         exit(1);
2172     }
2173     
2174 }
2175 //*******************************************************************/
2176 int TrimOligos::stripBarcode(Sequence& seq, int& group){
2177     try {
2178         
2179         if (paired) { int success = stripPairedBarcode(seq, group); return success; }
2180         
2181         string rawSequence = seq.getUnaligned();
2182         int success = bdiffs + 1;       //guilty until proven innocent
2183         
2184         //can you find the barcode
2185         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
2186             string oligo = it->first;
2187             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
2188                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
2189                 break;
2190             }
2191             
2192             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2193                 group = it->second;
2194                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2195                 
2196                 success = 0;
2197                 break;
2198             }
2199         }
2200         
2201         //if you found the barcode or if you don't want to allow for diffs
2202         if ((bdiffs == 0) || (success == 0)) { return success; }
2203         
2204         else { //try aligning and see if you can find it
2205             Alignment* alignment;
2206             if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
2207             else{ alignment = NULL; }
2208             
2209             //can you find the barcode
2210             int minDiff = 1e6;
2211             int minCount = 1;
2212             int minGroup = -1;
2213             int minPos = 0;
2214             
2215             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
2216                 string oligo = it->first;
2217                 // int length = oligo.length();
2218                 
2219                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
2220                     success = bdiffs + 10;
2221                     break;
2222                 }
2223                 
2224                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2225                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
2226                 oligo = alignment->getSeqAAln();
2227                 string temp = alignment->getSeqBAln();
2228                 
2229                 int alnLength = oligo.length();
2230                 
2231                 for(int i=oligo.length()-1;i>=0;i--){
2232                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2233                 }
2234                 oligo = oligo.substr(0,alnLength);
2235                 temp = temp.substr(0,alnLength);
2236                 
2237                 int numDiff = countDiffs(oligo, temp);
2238                 
2239                 if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
2240                 
2241                 if(numDiff < minDiff){
2242                     minDiff = numDiff;
2243                     minCount = 1;
2244                     minGroup = it->second;
2245                     minPos = 0;
2246                     for(int i=0;i<alnLength;i++){
2247                         if(temp[i] != '-'){
2248                             minPos++;
2249                         }
2250                     }
2251                 }
2252                 else if(numDiff == minDiff){
2253                     minCount++;
2254                 }
2255                 
2256             }
2257             
2258             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
2259             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
2260             else{       //use the best match
2261                 group = minGroup;
2262                 seq.setUnaligned(rawSequence.substr(minPos));
2263                 success = minDiff;
2264             }
2265             
2266             if (alignment != NULL) { delete alignment; }
2267             
2268         }
2269         
2270         return success;
2271         
2272     }
2273     catch(exception& e) {
2274         m->errorOut(e, "TrimOligos", "stripBarcode");
2275         exit(1);
2276     }
2277     
2278 }
2279
2280 /********************************************************************/
2281 int TrimOligos::stripForward(Sequence& seq, int& group){
2282     try {
2283         
2284         if (paired) { int success = stripPairedPrimers(seq, group); return success; }
2285         
2286         string rawSequence = seq.getUnaligned();
2287         int success = pdiffs + 1;       //guilty until proven innocent
2288         
2289         //can you find the primer
2290         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2291             string oligo = it->first;
2292             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
2293                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
2294                 break;
2295             }
2296             
2297             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2298                 group = it->second;
2299                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2300                 success = 0;
2301                 break;
2302             }
2303         }
2304         
2305         //if you found the barcode or if you don't want to allow for diffs
2306         if ((pdiffs == 0) || (success == 0)) {  return success; }
2307         
2308         else { //try aligning and see if you can find it
2309             Alignment* alignment;
2310             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
2311             else{ alignment = NULL; }
2312             
2313             //can you find the barcode
2314             int minDiff = 1e6;
2315             int minCount = 1;
2316             int minGroup = -1;
2317             int minPos = 0;
2318             
2319             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2320                 string oligo = it->first;
2321                 // int length = oligo.length();
2322                 
2323                 if(rawSequence.length() < maxFPrimerLength){
2324                     success = pdiffs + 100;
2325                     break;
2326                 }
2327                 
2328                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2329                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
2330                 oligo = alignment->getSeqAAln();
2331                 string temp = alignment->getSeqBAln();
2332                 
2333                 int alnLength = oligo.length();
2334                 
2335                 for(int i=oligo.length()-1;i>=0;i--){
2336                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2337                 }
2338                 oligo = oligo.substr(0,alnLength);
2339                 temp = temp.substr(0,alnLength);
2340                 
2341                 int numDiff = countDiffs(oligo, temp);
2342                 
2343                 if(numDiff < minDiff){
2344                     minDiff = numDiff;
2345                     minCount = 1;
2346                     minGroup = it->second;
2347                     minPos = 0;
2348                     for(int i=0;i<alnLength;i++){
2349                         if(temp[i] != '-'){
2350                             minPos++;
2351                         }
2352                     }
2353                 }
2354                 else if(numDiff == minDiff){
2355                     minCount++;
2356                 }
2357                 
2358             }
2359             
2360             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
2361             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
2362             else{       //use the best match
2363                 group = minGroup;
2364                 seq.setUnaligned(rawSequence.substr(minPos));
2365                 success = minDiff;
2366             }
2367             
2368             if (alignment != NULL) { delete alignment; }
2369             
2370         }
2371         
2372         return success;
2373         
2374     }
2375     catch(exception& e) {
2376         m->errorOut(e, "TrimOligos", "stripForward");
2377         exit(1);
2378     }
2379 }
2380 //*******************************************************************/
2381 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
2382     try {
2383         
2384         if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
2385         
2386         string rawSequence = seq.getUnaligned();
2387         int success = pdiffs + 1;       //guilty until proven innocent
2388         
2389         //can you find the primer
2390         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2391             string oligo = it->first;
2392             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
2393                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
2394                 break;
2395             }
2396             
2397             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2398                 group = it->second;
2399                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
2400                 if(qual.getName() != ""){
2401                     if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
2402                 }
2403                 success = 0;
2404                 break;
2405             }
2406         }
2407         
2408         //if you found the barcode or if you don't want to allow for diffs
2409         if ((pdiffs == 0) || (success == 0)) { return success; }
2410         
2411         else { //try aligning and see if you can find it
2412             Alignment* alignment;
2413             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
2414             else{ alignment = NULL; }
2415             
2416             //can you find the barcode
2417             int minDiff = 1e6;
2418             int minCount = 1;
2419             int minGroup = -1;
2420             int minPos = 0;
2421             
2422             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2423                 string oligo = it->first;
2424                 // int length = oligo.length();
2425                 
2426                 if(rawSequence.length() < maxFPrimerLength){
2427                     success = pdiffs + 100;
2428                     break;
2429                 }
2430                 
2431                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2432                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
2433                 oligo = alignment->getSeqAAln();
2434                 string temp = alignment->getSeqBAln();
2435                 
2436                 int alnLength = oligo.length();
2437                 
2438                 for(int i=oligo.length()-1;i>=0;i--){
2439                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2440                 }
2441                 oligo = oligo.substr(0,alnLength);
2442                 temp = temp.substr(0,alnLength);
2443                 
2444                 int numDiff = countDiffs(oligo, temp);
2445                 
2446                 if(numDiff < minDiff){
2447                     minDiff = numDiff;
2448                     minCount = 1;
2449                     minGroup = it->second;
2450                     minPos = 0;
2451                     for(int i=0;i<alnLength;i++){
2452                         if(temp[i] != '-'){
2453                             minPos++;
2454                         }
2455                     }
2456                 }
2457                 else if(numDiff == minDiff){
2458                     minCount++;
2459                 }
2460                 
2461             }
2462             
2463             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
2464             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
2465             else{       //use the best match
2466                 group = minGroup;
2467                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2468                 if(qual.getName() != ""){
2469                     if (!keepForward) { qual.trimQScores(minPos, -1); }
2470                 }
2471                 success = minDiff;
2472             }
2473             
2474             if (alignment != NULL) { delete alignment; }
2475             
2476         }
2477         
2478         return success;
2479         
2480     }
2481     catch(exception& e) {
2482         m->errorOut(e, "TrimOligos", "stripForward");
2483         exit(1);
2484     }
2485 }
2486 //******************************************************************/
2487 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2488     try {
2489         string rawSequence = seq.getUnaligned();
2490         bool success = 0;       //guilty until proven innocent
2491         
2492         for(int i=0;i<revPrimer.size();i++){
2493             string oligo = revPrimer[i];
2494             
2495             if(rawSequence.length() < oligo.length()){
2496                 success = 0;
2497                 break;
2498             }
2499             
2500             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2501                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2502                 if(qual.getName() != ""){
2503                     qual.trimQScores(-1, rawSequence.length()-oligo.length());
2504                 }
2505                 success = 1;
2506                 break;
2507             }
2508         }       
2509         return success;
2510         
2511     }
2512     catch(exception& e) {
2513         m->errorOut(e, "TrimOligos", "stripReverse");
2514         exit(1);
2515     }
2516 }
2517 //******************************************************************/
2518 bool TrimOligos::stripReverse(Sequence& seq){
2519     try {
2520         
2521         string rawSequence = seq.getUnaligned();
2522         bool success = 0;       //guilty until proven innocent
2523         
2524         for(int i=0;i<revPrimer.size();i++){
2525             string oligo = revPrimer[i];
2526             
2527             if(rawSequence.length() < oligo.length()){
2528                 success = 0;
2529                 break;
2530             }
2531             
2532             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2533                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2534                 success = 1;
2535                 break;
2536             }
2537         }       
2538         
2539         return success;
2540         
2541     }
2542     catch(exception& e) {
2543         m->errorOut(e, "TrimOligos", "stripReverse");
2544         exit(1);
2545     }
2546 }
2547 //******************************************************************/
2548 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2549     try {
2550         string rawSequence = seq.getUnaligned();
2551         bool success = ldiffs + 1;      //guilty until proven innocent
2552         
2553         for(int i=0;i<linker.size();i++){
2554             string oligo = linker[i];
2555             
2556             if(rawSequence.length() < oligo.length()){
2557                 success = ldiffs + 10;
2558                 break;
2559             }
2560             
2561             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2562                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2563                 if(qual.getName() != ""){
2564                     qual.trimQScores(oligo.length(), -1);
2565                 }
2566                 success = 0;
2567                 break;
2568             }
2569         }
2570         
2571         //if you found the linker or if you don't want to allow for diffs
2572         if ((ldiffs == 0) || (success == 0)) { return success; }
2573         
2574         else { //try aligning and see if you can find it
2575             Alignment* alignment;
2576             if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }   
2577             else{ alignment = NULL; }
2578             
2579             //can you find the barcode
2580             int minDiff = 1e6;
2581             int minCount = 1;
2582             int minPos = 0;
2583             
2584             for(int i = 0; i < linker.size(); i++){
2585                 string oligo = linker[i];
2586                 // int length = oligo.length();
2587                 
2588                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2589                     success = ldiffs + 10;
2590                     break;
2591                 }
2592                 
2593                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2594                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2595                 oligo = alignment->getSeqAAln();
2596                 string temp = alignment->getSeqBAln();
2597                 
2598                 int alnLength = oligo.length();
2599                 
2600                 for(int i=oligo.length()-1;i>=0;i--){
2601                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2602                 }
2603                 oligo = oligo.substr(0,alnLength);
2604                 temp = temp.substr(0,alnLength);
2605                 
2606                 int numDiff = countDiffs(oligo, temp);
2607                 
2608                 if(numDiff < minDiff){
2609                     minDiff = numDiff;
2610                     minCount = 1;
2611                     minPos = 0;
2612                     for(int i=0;i<alnLength;i++){
2613                         if(temp[i] != '-'){
2614                             minPos++;
2615                         }
2616                     }
2617                 }
2618                 else if(numDiff == minDiff){
2619                     minCount++;
2620                 }
2621                 
2622             }
2623             
2624             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2625             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2626             else{       //use the best match
2627                 seq.setUnaligned(rawSequence.substr(minPos));
2628                 
2629                 if(qual.getName() != ""){
2630                     qual.trimQScores(minPos, -1);
2631                 }
2632                 success = minDiff;
2633             }
2634             
2635             if (alignment != NULL) { delete alignment; }
2636             
2637         }
2638         
2639         
2640         return success;
2641         
2642     }
2643     catch(exception& e) {
2644         m->errorOut(e, "TrimOligos", "stripLinker");
2645         exit(1);
2646     }
2647 }
2648 //******************************************************************/
2649 bool TrimOligos::stripLinker(Sequence& seq){
2650     try {
2651         
2652         string rawSequence = seq.getUnaligned();
2653         bool success = ldiffs +1;       //guilty until proven innocent
2654         
2655         for(int i=0;i<linker.size();i++){
2656             string oligo = linker[i];
2657             
2658             if(rawSequence.length() < oligo.length()){
2659                 success = ldiffs +10;
2660                 break;
2661             }
2662             
2663             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2664                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2665                 success = 0;
2666                 break;
2667             }
2668         }       
2669         
2670         //if you found the linker or if you don't want to allow for diffs
2671         if ((ldiffs == 0) || (success == 0)) { return success; }
2672         
2673         else { //try aligning and see if you can find it
2674             Alignment* alignment;
2675             if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2676             else{ alignment = NULL; }
2677             
2678             //can you find the barcode
2679             int minDiff = 1e6;
2680             int minCount = 1;
2681             int minPos = 0;
2682             
2683             for(int i = 0; i < linker.size(); i++){
2684                 string oligo = linker[i];
2685                 // int length = oligo.length();
2686                 
2687                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2688                     success = ldiffs + 10;
2689                     break;
2690                 }
2691                 
2692                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2693                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2694                 oligo = alignment->getSeqAAln();
2695                 string temp = alignment->getSeqBAln();
2696                 
2697                 int alnLength = oligo.length();
2698                 
2699                 for(int i=oligo.length()-1;i>=0;i--){
2700                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2701                 }
2702                 oligo = oligo.substr(0,alnLength);
2703                 temp = temp.substr(0,alnLength);
2704                 
2705                 int numDiff = countDiffs(oligo, temp);
2706                 
2707                 if(numDiff < minDiff){
2708                     minDiff = numDiff;
2709                     minCount = 1;
2710                     minPos = 0;
2711                     for(int i=0;i<alnLength;i++){
2712                         if(temp[i] != '-'){
2713                             minPos++;
2714                         }
2715                     }
2716                 }
2717                 else if(numDiff == minDiff){
2718                     minCount++;
2719                 }
2720                 
2721             }
2722             
2723             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2724             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2725             else{       //use the best match
2726                 seq.setUnaligned(rawSequence.substr(minPos));
2727                 success = minDiff;
2728             }
2729             
2730             if (alignment != NULL) { delete alignment; }
2731             
2732         }
2733         
2734         return success;
2735         
2736     }
2737     catch(exception& e) {
2738         m->errorOut(e, "TrimOligos", "stripLinker");
2739         exit(1);
2740     }
2741 }
2742
2743 //******************************************************************/
2744 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2745     try {
2746         string rawSequence = seq.getUnaligned();
2747         bool success = sdiffs+1;        //guilty until proven innocent
2748         
2749         for(int i=0;i<spacer.size();i++){
2750             string oligo = spacer[i];
2751             
2752             if(rawSequence.length() < oligo.length()){
2753                 success = sdiffs+10;
2754                 break;
2755             }
2756             
2757             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2758                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2759                 if(qual.getName() != ""){
2760                     qual.trimQScores(oligo.length(), -1);
2761                 }
2762                 success = 0;
2763                 break;
2764             }
2765         }
2766         
2767         //if you found the spacer or if you don't want to allow for diffs
2768         if ((sdiffs == 0) || (success == 0)) { return success; }
2769         
2770         else { //try aligning and see if you can find it
2771             Alignment* alignment;
2772             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2773             else{ alignment = NULL; }
2774             
2775             //can you find the barcode
2776             int minDiff = 1e6;
2777             int minCount = 1;
2778             int minPos = 0;
2779             
2780             for(int i = 0; i < spacer.size(); i++){
2781                 string oligo = spacer[i];
2782                 // int length = oligo.length();
2783                 
2784                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2785                     success = sdiffs + 10;
2786                     break;
2787                 }
2788                 
2789                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2790                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2791                 oligo = alignment->getSeqAAln();
2792                 string temp = alignment->getSeqBAln();
2793                 
2794                 int alnLength = oligo.length();
2795                 
2796                 for(int i=oligo.length()-1;i>=0;i--){
2797                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2798                 }
2799                 oligo = oligo.substr(0,alnLength);
2800                 temp = temp.substr(0,alnLength);
2801                 
2802                 int numDiff = countDiffs(oligo, temp);
2803                 
2804                 if(numDiff < minDiff){
2805                     minDiff = numDiff;
2806                     minCount = 1;
2807                     minPos = 0;
2808                     for(int i=0;i<alnLength;i++){
2809                         if(temp[i] != '-'){
2810                             minPos++;
2811                         }
2812                     }
2813                 }
2814                 else if(numDiff == minDiff){
2815                     minCount++;
2816                 }
2817                 
2818             }
2819             
2820             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2821             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2822             else{       //use the best match
2823                 seq.setUnaligned(rawSequence.substr(minPos));
2824                 
2825                 if(qual.getName() != ""){
2826                     qual.trimQScores(minPos, -1);
2827                 }
2828                 success = minDiff;
2829             }
2830             
2831             if (alignment != NULL) { delete alignment; }
2832             
2833         }
2834         
2835         
2836         return success;
2837         
2838     }
2839     catch(exception& e) {
2840         m->errorOut(e, "TrimOligos", "stripSpacer");
2841         exit(1);
2842     }
2843 }
2844 //******************************************************************/
2845 bool TrimOligos::stripSpacer(Sequence& seq){
2846     try {
2847         
2848         string rawSequence = seq.getUnaligned();
2849         bool success = sdiffs+1;        //guilty until proven innocent
2850         
2851         for(int i=0;i<spacer.size();i++){
2852             string oligo = spacer[i];
2853             
2854             if(rawSequence.length() < oligo.length()){
2855                 success = sdiffs+10;
2856                 break;
2857             }
2858             
2859             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2860                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2861                 success = 0;
2862                 break;
2863             }
2864         }       
2865         
2866         //if you found the spacer or if you don't want to allow for diffs
2867         if ((sdiffs == 0) || (success == 0)) { return success; }
2868         
2869         else { //try aligning and see if you can find it
2870             Alignment* alignment;
2871             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2872             else{ alignment = NULL; }
2873             
2874             //can you find the barcode
2875             int minDiff = 1e6;
2876             int minCount = 1;
2877             int minPos = 0;
2878             
2879             for(int i = 0; i < spacer.size(); i++){
2880                 string oligo = spacer[i];
2881                 // int length = oligo.length();
2882                 
2883                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2884                     success = sdiffs + 10;
2885                     break;
2886                 }
2887                 
2888                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2889                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2890                 oligo = alignment->getSeqAAln();
2891                 string temp = alignment->getSeqBAln();
2892                 
2893                 int alnLength = oligo.length();
2894                 
2895                 for(int i=oligo.length()-1;i>=0;i--){
2896                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2897                 }
2898                 oligo = oligo.substr(0,alnLength);
2899                 temp = temp.substr(0,alnLength);
2900                 
2901                 int numDiff = countDiffs(oligo, temp);
2902                 
2903                 if(numDiff < minDiff){
2904                     minDiff = numDiff;
2905                     minCount = 1;
2906                     minPos = 0;
2907                     for(int i=0;i<alnLength;i++){
2908                         if(temp[i] != '-'){
2909                             minPos++;
2910                         }
2911                     }
2912                 }
2913                 else if(numDiff == minDiff){
2914                     minCount++;
2915                 }
2916                 
2917             }
2918             
2919             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2920             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2921             else{       //use the best match
2922                 seq.setUnaligned(rawSequence.substr(minPos));
2923                 success = minDiff;
2924             }
2925             
2926             if (alignment != NULL) { delete alignment; }
2927             
2928         }
2929         
2930         return success;
2931         
2932     }
2933     catch(exception& e) {
2934         m->errorOut(e, "TrimOligos", "stripSpacer");
2935         exit(1);
2936     }
2937 }
2938
2939 //******************************************************************/
2940 bool TrimOligos::compareDNASeq(string oligo, string seq){
2941     try {
2942         bool success = 1;
2943         int length = oligo.length();
2944         
2945         for(int i=0;i<length;i++){
2946             
2947             if(oligo[i] != seq[i]){
2948                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0; }
2949                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       success = 0;    }
2950                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       success = 0;    }
2951                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       success = 0;    }
2952                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       success = 0;    }
2953                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       success = 0;    }
2954                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       success = 0;    }
2955                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       success = 0;    }
2956                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2957                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2958                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
2959                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }       
2960                 
2961                 if(success == 0)        {       break;  }
2962             }
2963             else{
2964                 success = 1;
2965             }
2966         }
2967         
2968         return success;
2969     }
2970     catch(exception& e) {
2971         m->errorOut(e, "TrimOligos", "compareDNASeq");
2972         exit(1);
2973     }
2974     
2975 }
2976 //********************************************************************/
2977 int TrimOligos::countDiffs(string oligo, string seq){
2978     try {
2979         
2980         int length = oligo.length();
2981         int countDiffs = 0;
2982         
2983         for(int i=0;i<length;i++){
2984             
2985             if(oligo[i] != seq[i]){
2986                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++; }
2987                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       countDiffs++;   }
2988                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       countDiffs++;   }
2989                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       countDiffs++;   }
2990                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       countDiffs++;   }
2991                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       countDiffs++;   }
2992                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       countDiffs++;   }
2993                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       countDiffs++;   }
2994                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2995                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2996                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
2997                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
2998             }
2999             
3000         }
3001         
3002         return countDiffs;
3003     }
3004     catch(exception& e) {
3005         m->errorOut(e, "TrimOligos", "countDiffs");
3006         exit(1);
3007     }
3008 }
3009 //********************************************************************/
3010 string TrimOligos::reverseOligo(string oligo){
3011     try {
3012         string reverse = "";
3013         
3014         for(int i=oligo.length()-1;i>=0;i--){
3015             
3016             if(oligo[i] == 'A') {       reverse += 'T'; }
3017             else if(oligo[i] == 'T'){   reverse += 'A'; }
3018             else if(oligo[i] == 'U'){   reverse += 'A'; }
3019             
3020             else if(oligo[i] == 'G'){   reverse += 'C'; }
3021             else if(oligo[i] == 'C'){   reverse += 'G'; }
3022             
3023             else if(oligo[i] == 'R'){   reverse += 'Y'; }
3024             else if(oligo[i] == 'Y'){   reverse += 'R'; }
3025             
3026             else if(oligo[i] == 'M'){   reverse += 'K'; }
3027             else if(oligo[i] == 'K'){   reverse += 'M'; }
3028             
3029             else if(oligo[i] == 'W'){   reverse += 'W'; }
3030             else if(oligo[i] == 'S'){   reverse += 'S'; }
3031             
3032             else if(oligo[i] == 'B'){   reverse += 'V'; }
3033             else if(oligo[i] == 'V'){   reverse += 'B'; }
3034             
3035             else if(oligo[i] == 'D'){   reverse += 'H'; }
3036             else if(oligo[i] == 'H'){   reverse += 'D'; }
3037             
3038             else if(oligo[i] == '-'){   reverse += '-'; }
3039             else        {       reverse += 'N'; }
3040         }
3041         
3042         
3043         return reverse;
3044     }
3045     catch(exception& e) {
3046         m->errorOut(e, "TrimOligos", "reverseOligo");
3047         exit(1);
3048     }
3049 }
3050
3051 /********************************************************************/
3052
3053