]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
changed random forest output filename
[mothur.git] / trimoligos.cpp
1 /*
2  * trimoligos.cpp
3  * Mothur
4  *
5  * Created by westcott on 9/1/11.
6  * Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18     try {
19         m = MothurOut::getInstance();
20         paired = false;
21         
22         pdiffs = p;
23         bdiffs = b;
24         ldiffs = l;
25         sdiffs = s;
26         
27         barcodes = br;
28         primers = pr;
29         revPrimer = r;
30         linker = lk;
31         spacer = sp;
32         maxFBarcodeLength = 0;
33         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34             if(it->first.length() > maxFBarcodeLength){
35                 maxFBarcodeLength = it->first.length();
36             }
37         }
38         maxFPrimerLength = 0;
39         map<string,int>::iterator it;
40         for(it=primers.begin();it!=primers.end();it++){
41             if(it->first.length() > maxFPrimerLength){
42                 maxFPrimerLength = it->first.length();
43             }
44         }
45         
46         maxLinkerLength = 0;
47         for(int i = 0; i < linker.size(); i++){
48             if(linker[i].length() > maxLinkerLength){
49                 maxLinkerLength = linker[i].length();
50             }
51         }
52         
53         maxSpacerLength = 0;
54         for(int i = 0; i < spacer.size(); i++){
55             if(spacer[i].length() > maxSpacerLength){
56                 maxSpacerLength = spacer[i].length();
57             }
58         }
59     }
60     catch(exception& e) {
61         m->errorOut(e, "TrimOligos", "TrimOligos");
62         exit(1);
63     }
64 }
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
68     try {
69         m = MothurOut::getInstance();
70         
71         pdiffs = p;
72         bdiffs = b;
73         ldiffs = l;
74         sdiffs = s;
75         paired = true;
76         
77         maxFBarcodeLength = 0;
78         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79             string forward = it->second.forward;
80             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
81             
82             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
83             
84             if (itForward == ifbarcodes.end()) {
85                 vector<int> temp; temp.push_back(it->first);
86                 ifbarcodes[forward] = temp;
87             }else { itForward->second.push_back(it->first); }
88         }
89         
90         maxFPrimerLength = 0;
91         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92             string forward = it->second.forward;
93             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
94             
95             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
96             
97             if (itForward == ifprimers.end()) {
98                 vector<int> temp; temp.push_back(it->first);
99                 ifprimers[forward] = temp;
100             }else { itForward->second.push_back(it->first); }
101         }
102         
103         maxRBarcodeLength = 0;
104         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105             string forward = it->second.reverse;
106             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
107             
108             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
109             
110             if (itForward == irbarcodes.end()) {
111                 vector<int> temp; temp.push_back(it->first);
112                 irbarcodes[forward] = temp;
113             }else { itForward->second.push_back(it->first); }
114         }
115         
116         maxRPrimerLength = 0;
117         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118             string forward = it->second.reverse;
119             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
120             
121             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
122             
123             if (itForward == irprimers.end()) {
124                 vector<int> temp; temp.push_back(it->first);
125                 irprimers[forward] = temp;
126             }else { itForward->second.push_back(it->first); }
127         }
128         
129         ipbarcodes = br;
130         ipprimers = pr;
131     }
132     catch(exception& e) {
133         m->errorOut(e, "TrimOligos", "TrimOligos");
134         exit(1);
135     }
136 }
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
140     try {
141         m = MothurOut::getInstance();
142         
143         pdiffs = p;
144         bdiffs = b;
145         
146         barcodes = br;
147         primers = pr;
148         revPrimer = r;
149         paired = false;
150         
151         maxFBarcodeLength = 0;
152         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153             string oligo = it->first;
154             if(oligo.length() > maxFBarcodeLength){
155                 maxFBarcodeLength = oligo.length();
156             }
157         }
158         maxRBarcodeLength = maxFBarcodeLength;
159         
160         maxFPrimerLength = 0;
161         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162             string oligo = it->first;
163             if(oligo.length() > maxFPrimerLength){
164                 maxFPrimerLength = oligo.length();
165             }
166         }
167         maxRPrimerLength = maxFPrimerLength;
168     }
169     catch(exception& e) {
170         m->errorOut(e, "TrimOligos", "TrimOligos");
171         exit(1);
172     }
173 }
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
178     try {
179         
180         string rawSequence = seq.getUnaligned();
181         
182         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183             string oligo = it->first;
184             
185             if(rawSequence.length() < oligo.length()) { break; }
186             
187             //search for primer
188             int olength = oligo.length();
189             for (int j = 0; j < rawSequence.length()-olength; j++){
190                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191                 string rawChunk = rawSequence.substr(j, olength);
192                 if(compareDNASeq(oligo, rawChunk)) {
193                     primerStart = j;
194                     primerEnd = primerStart + olength;
195                     return true;
196                 }
197                 
198             }
199         }
200         
201         primerStart = 0; primerEnd = 0;
202         //if you don't want to allow for diffs
203         if (pdiffs == 0) { return false; }
204         else { //try aligning and see if you can find it
205             
206             //can you find the barcode
207             int minDiff = 1e6;
208             int minCount = 1;
209             
210             Alignment* alignment;
211             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212             else{ alignment = NULL; }
213             
214             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
215                 
216                 string prim = it->first;
217                 //search for primer
218                 int olength = prim.length();
219                 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
220                 else{
221                     for (int j = 0; j < rawSequence.length()-olength; j++){
222                         
223                         string oligo = it->first;
224                         
225                         if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
226                         
227                         string rawChunk = rawSequence.substr(j, olength+pdiffs);
228                         
229                         //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230                         alignment->alignPrimer(oligo, rawChunk);
231                         oligo = alignment->getSeqAAln();
232                         string temp = alignment->getSeqBAln();
233                         
234                         int alnLength = oligo.length();
235                         
236                         for(int i=oligo.length()-1;i>=0;i--){
237                             if(oligo[i] != '-'){        alnLength = i+1;        break;  }
238                         }
239                         oligo = oligo.substr(0,alnLength);
240                         temp = temp.substr(0,alnLength);
241                         
242                         int numDiff = countDiffs(oligo, temp);
243                         
244                         if(numDiff < minDiff){
245                             minDiff = numDiff;
246                             minCount = 1;
247                             primerStart = j;
248                             primerEnd = primerStart + alnLength;
249                         }else if(numDiff == minDiff){ minCount++; }
250                     }
251                 }
252             }
253             
254             if (alignment != NULL) { delete alignment; }
255             
256             if(minDiff > pdiffs)        {       primerStart = 0; primerEnd = 0; return false;   }       //no good matches
257             else if(minCount > 1)       {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
258             else{ return true; }
259         }
260         
261         primerStart = 0; primerEnd = 0;
262         return false;
263         
264     }
265     catch(exception& e) {
266         m->errorOut(e, "TrimOligos", "stripForward");
267         exit(1);
268     }
269 }
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
272     try {
273         
274         string rawSequence = seq.getUnaligned();
275         
276         for(int i=0;i<revPrimer.size();i++){
277             string oligo = revPrimer[i];
278             if(rawSequence.length() < oligo.length()) { break; }
279             
280             //search for primer
281             int olength = oligo.length();
282             for (int j = rawSequence.length()-olength; j >= 0; j--){
283                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284                 string rawChunk = rawSequence.substr(j, olength);
285                 
286                 if(compareDNASeq(oligo, rawChunk)) {
287                     primerStart = j;
288                     primerEnd = primerStart + olength;
289                     return true;
290                 }
291                 
292             }
293         }
294         
295         primerStart = 0; primerEnd = 0;
296         return false;
297     }
298     catch(exception& e) {
299         m->errorOut(e, "PcrSeqsCommand", "findReverse");
300         exit(1);
301     }
302 }
303 //*******************************************************************/
304
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
306     try {
307         
308         if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
309         
310         string rawSequence = seq.getUnaligned();
311         int success = bdiffs + 1;       //guilty until proven innocent
312         
313         //can you find the barcode
314         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315             string oligo = it->first;
316             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
317                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
318                 break;
319             }
320             
321             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
322                 group = it->second;
323                 seq.setUnaligned(rawSequence.substr(oligo.length()));
324                 
325                 if(qual.getName() != ""){
326                     qual.trimQScores(oligo.length(), -1);
327                 }
328                 
329                 success = 0;
330                 break;
331             }
332         }
333         
334         //if you found the barcode or if you don't want to allow for diffs
335         if ((bdiffs == 0) || (success == 0)) { return success; }
336         
337         else { //try aligning and see if you can find it
338             Alignment* alignment;
339             if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340             else{ alignment = NULL; }
341             
342             //can you find the barcode
343             int minDiff = 1e6;
344             int minCount = 1;
345             int minGroup = -1;
346             int minPos = 0;
347             
348             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349                 string oligo = it->first;
350                 // int length = oligo.length();
351                 
352                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
353                     success = bdiffs + 10;
354                     break;
355                 }
356                 
357                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359                 oligo = alignment->getSeqAAln();
360                 string temp = alignment->getSeqBAln();
361                 
362                 int alnLength = oligo.length();
363                 
364                 for(int i=oligo.length()-1;i>=0;i--){
365                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
366                 }
367                 oligo = oligo.substr(0,alnLength);
368                 temp = temp.substr(0,alnLength);
369                 int numDiff = countDiffs(oligo, temp);
370                 
371                 if(numDiff < minDiff){
372                     minDiff = numDiff;
373                     minCount = 1;
374                     minGroup = it->second;
375                     minPos = 0;
376                     for(int i=0;i<alnLength;i++){
377                         if(temp[i] != '-'){
378                             minPos++;
379                         }
380                     }
381                 }
382                 else if(numDiff == minDiff){
383                     minCount++;
384                 }
385                 
386             }
387             
388             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
389             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
390             else{       //use the best match
391                 group = minGroup;
392                 seq.setUnaligned(rawSequence.substr(minPos));
393                 
394                 if(qual.getName() != ""){
395                     qual.trimQScores(minPos, -1);
396                 }
397                 success = minDiff;
398             }
399             
400             if (alignment != NULL) { delete alignment; }
401             
402         }
403         
404         return success;
405         
406     }
407     catch(exception& e) {
408         m->errorOut(e, "TrimOligos", "stripBarcode");
409         exit(1);
410     }
411 }
412 //*******************************************************************/
413 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
414     try {
415         //look for forward barcode
416         string rawFSequence = forwardSeq.getUnaligned();
417         string rawRSequence = reverseSeq.getUnaligned();
418         int success = bdiffs + 1;       //guilty until proven innocent
419         
420         //can you find the forward barcode
421         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
422             string foligo = it->second.forward;
423             string roligo = it->second.reverse;
424             
425             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
426                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
427                 break;
428             }
429             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
430                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
431                 break;
432             }
433             
434             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
435                 group = it->first;
436                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
437                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
438                 success = 0;
439                 break;
440             }
441         }
442         
443         //if you found the barcode or if you don't want to allow for diffs
444         if ((bdiffs == 0) || (success == 0)) { return success; }
445         else { //try aligning and see if you can find it
446             Alignment* alignment;
447             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
448             else{ alignment = NULL; }
449             
450             //can you find the barcode
451             int minDiff = 1e6;
452             int minCount = 1;
453             vector< vector<int> > minFGroup;
454             vector<int> minFPos;
455             
456             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
457             /*
458              1 Sarah Westcott
459              2 John Westcott
460              3 Anna Westcott
461              4 Sarah Schloss
462              5 Pat Schloss
463              6 Gail Brown
464              7 Pat Moore
465              only want to look for forward = Sarah, John, Anna, Pat, Gail
466              reverse = Westcott, Schloss, Brown, Moore
467              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
468              */
469             //cout << endl << forwardSeq.getName() << endl;
470             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
471                 string oligo = it->first;
472                 
473                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
474                     success = bdiffs + 10;
475                     break;
476                 }
477                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
478                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
479                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
480                 oligo = alignment->getSeqAAln();
481                 string temp = alignment->getSeqBAln();
482                 
483                 int alnLength = oligo.length();
484                 
485                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
486                 oligo = oligo.substr(0,alnLength);
487                 temp = temp.substr(0,alnLength);
488                 int numDiff = countDiffs(oligo, temp);
489                 
490                 if (alnLength == 0) { numDiff = bdiffs + 100; }
491                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
492                 
493                 if(numDiff < minDiff){
494                     minDiff = numDiff;
495                     minCount = 1;
496                     minFGroup.clear();
497                     minFGroup.push_back(it->second);
498                     int tempminFPos = 0;
499                     minFPos.clear();
500                     for(int i=0;i<alnLength;i++){
501                         if(temp[i] != '-'){
502                             tempminFPos++;
503                         }
504                     }
505                     minFPos.push_back(tempminFPos);
506                 }else if(numDiff == minDiff){
507                     minFGroup.push_back(it->second);
508                     int tempminFPos = 0;
509                     for(int i=0;i<alnLength;i++){
510                         if(temp[i] != '-'){
511                             tempminFPos++;
512                         }
513                     }
514                     minFPos.push_back(tempminFPos);
515                 }
516             }
517             
518             //cout << minDiff << '\t' << minCount << '\t' << endl;
519             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
520             else{
521                 //check for reverse match
522                 if (alignment != NULL) { delete alignment; }
523                 
524                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
525                 else{ alignment = NULL; }
526                 
527                 //can you find the barcode
528                 minDiff = 1e6;
529                 minCount = 1;
530                 vector< vector<int> > minRGroup;
531                 vector<int> minRPos;
532                 
533                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
534                     string oligo = it->first;
535                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
536                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
537                         success = bdiffs + 10;
538                         break;
539                     }
540                     
541                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
542                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
543                     oligo = alignment->getSeqAAln();
544                     string temp = alignment->getSeqBAln();
545                     
546                     int alnLength = oligo.length();
547                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
548                     oligo = oligo.substr(0,alnLength);
549                     temp = temp.substr(0,alnLength);
550                     int numDiff = countDiffs(oligo, temp);
551                     if (alnLength == 0) { numDiff = bdiffs + 100; }
552                     
553                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
554                     if(numDiff < minDiff){
555                         minDiff = numDiff;
556                         minCount = 1;
557                         minRGroup.clear();
558                         minRGroup.push_back(it->second);
559                         int tempminRPos = 0;
560                         minRPos.clear();
561                         for(int i=0;i<alnLength;i++){
562                             if(temp[i] != '-'){
563                                 tempminRPos++;
564                             }
565                         }
566                         minRPos.push_back(tempminRPos);
567                     }else if(numDiff == minDiff){
568                         int tempminRPos = 0;
569                         for(int i=0;i<alnLength;i++){
570                             if(temp[i] != '-'){
571                                 tempminRPos++;
572                             }
573                         }
574                         minRPos.push_back(tempminRPos);
575                         minRGroup.push_back(it->second);
576                     }
577                     
578                 }
579                 
580                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
581                 else {
582                     bool foundMatch = false;
583                     vector<int> matches;
584                     for (int i = 0; i < minFGroup.size(); i++) {
585                         for (int j = 0; j < minFGroup[i].size(); j++) {
586                             for (int k = 0; k < minRGroup.size(); k++) {
587                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
588                             }
589                         }
590                     }
591                     
592                     int fStart = 0;
593                     int rStart = 0;
594                     if (matches.size() == 1) {
595                         foundMatch = true;
596                         group = matches[0];
597                         fStart = minFPos[0];
598                         rStart = minRPos[0];
599                     }
600                     
601                     //we have an acceptable match for the forward and reverse, but do they match?
602                     if (foundMatch) {
603                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
604                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
605                         success = minDiff;
606                     }else { success = bdiffs + 100;     }
607                 }
608             }
609             
610             if (alignment != NULL) { delete alignment; }
611         }
612         
613         return success;
614         
615     }
616     catch(exception& e) {
617         m->errorOut(e, "TrimOligos", "stripIBarcode");
618         exit(1);
619     }
620     
621 }
622 //*******************************************************************/
623 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
624     try {
625         //look for forward barcode
626         string rawFSequence = forwardSeq.getUnaligned();
627         string rawRSequence = reverseSeq.getUnaligned();
628         int success = bdiffs + 1;       //guilty until proven innocent
629         
630         //can you find the forward barcode
631         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
632             string foligo = it->second.forward;
633             string roligo = it->second.reverse;
634             
635             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
636                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
637                 break;
638             }
639             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
640                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
641                 break;
642             }
643             
644             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
645                 group = it->first;
646                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
647                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
648                 forwardQual.trimQScores(foligo.length(), -1);
649                 reverseQual.trimQScores(roligo.length(), -1);
650                 success = 0;
651                 break;
652             }
653         }
654         
655         //if you found the barcode or if you don't want to allow for diffs
656         if ((bdiffs == 0) || (success == 0)) { return success; }
657         else { //try aligning and see if you can find it
658             Alignment* alignment;
659             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
660             else{ alignment = NULL; }
661             
662             //can you find the barcode
663             int minDiff = 1e6;
664             int minCount = 1;
665             vector< vector<int> > minFGroup;
666             vector<int> minFPos;
667             
668             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
669             /*
670              1 Sarah Westcott
671              2 John Westcott
672              3 Anna Westcott
673              4 Sarah Schloss
674              5 Pat Schloss
675              6 Gail Brown
676              7 Pat Moore
677              only want to look for forward = Sarah, John, Anna, Pat, Gail
678              reverse = Westcott, Schloss, Brown, Moore
679              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
680              */
681             //cout << endl << forwardSeq.getName() << endl;
682             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
683                 string oligo = it->first;
684                 
685                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
686                     success = bdiffs + 10;
687                     break;
688                 }
689                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
690                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
691                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
692                 oligo = alignment->getSeqAAln();
693                 string temp = alignment->getSeqBAln();
694                 
695                 int alnLength = oligo.length();
696                 
697                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
698                 oligo = oligo.substr(0,alnLength);
699                 temp = temp.substr(0,alnLength);
700                 int numDiff = countDiffs(oligo, temp);
701                 
702                 if (alnLength == 0) { numDiff = bdiffs + 100; }
703                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
704                 
705                 if(numDiff < minDiff){
706                     minDiff = numDiff;
707                     minCount = 1;
708                     minFGroup.clear();
709                     minFGroup.push_back(it->second);
710                     int tempminFPos = 0;
711                     minFPos.clear();
712                     for(int i=0;i<alnLength;i++){
713                         if(temp[i] != '-'){
714                             tempminFPos++;
715                         }
716                     }
717                     minFPos.push_back(tempminFPos);
718                 }else if(numDiff == minDiff){
719                     minFGroup.push_back(it->second);
720                     int tempminFPos = 0;
721                     for(int i=0;i<alnLength;i++){
722                         if(temp[i] != '-'){
723                             tempminFPos++;
724                         }
725                     }
726                     minFPos.push_back(tempminFPos);
727                 }
728             }
729             
730             //cout << minDiff << '\t' << minCount << '\t' << endl;
731             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
732             else{
733                 //check for reverse match
734                 if (alignment != NULL) { delete alignment; }
735                 
736                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
737                 else{ alignment = NULL; }
738                 
739                 //can you find the barcode
740                 minDiff = 1e6;
741                 minCount = 1;
742                 vector< vector<int> > minRGroup;
743                 vector<int> minRPos;
744                 
745                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
746                     string oligo = it->first;
747                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
748                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
749                         success = bdiffs + 10;
750                         break;
751                     }
752                     
753                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
754                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
755                     oligo = alignment->getSeqAAln();
756                     string temp = alignment->getSeqBAln();
757                     
758                     int alnLength = oligo.length();
759                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
760                     oligo = oligo.substr(0,alnLength);
761                     temp = temp.substr(0,alnLength);
762                     int numDiff = countDiffs(oligo, temp);
763                     if (alnLength == 0) { numDiff = bdiffs + 100; }
764                     
765                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
766                     if(numDiff < minDiff){
767                         minDiff = numDiff;
768                         minCount = 1;
769                         minRGroup.clear();
770                         minRGroup.push_back(it->second);
771                         int tempminRPos = 0;
772                         minRPos.clear();
773                         for(int i=0;i<alnLength;i++){
774                             if(temp[i] != '-'){
775                                 tempminRPos++;
776                             }
777                         }
778                         minRPos.push_back(tempminRPos);
779                     }else if(numDiff == minDiff){
780                         int tempminRPos = 0;
781                         for(int i=0;i<alnLength;i++){
782                             if(temp[i] != '-'){
783                                 tempminRPos++;
784                             }
785                         }
786                         minRPos.push_back(tempminRPos);
787                         minRGroup.push_back(it->second);
788                     }
789                     
790                 }
791                 
792                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
793                 else {
794                     bool foundMatch = false;
795                     vector<int> matches;
796                     for (int i = 0; i < minFGroup.size(); i++) {
797                         for (int j = 0; j < minFGroup[i].size(); j++) {
798                             for (int k = 0; k < minRGroup.size(); k++) {
799                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
800                             }
801                         }
802                     }
803                     
804                     int fStart = 0;
805                     int rStart = 0;
806                     if (matches.size() == 1) {
807                         foundMatch = true;
808                         group = matches[0];
809                         fStart = minFPos[0];
810                         rStart = minRPos[0];
811                     }
812                     
813                     //we have an acceptable match for the forward and reverse, but do they match?
814                     if (foundMatch) {
815                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
816                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
817                         forwardQual.trimQScores(fStart, -1);
818                         reverseQual.trimQScores(rStart, -1);
819                         success = minDiff;
820                     }else { success = bdiffs + 100;     }
821                 }
822             }
823             
824             if (alignment != NULL) { delete alignment; }
825         }
826         
827         return success;
828         
829     }
830     catch(exception& e) {
831         m->errorOut(e, "TrimOligos", "stripIBarcode");
832         exit(1);
833     }
834     
835 }
836 //*******************************************************************/
837 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
838     try {
839         //look for forward barcode
840         string rawSeq = seq.getUnaligned();
841         int success = bdiffs + 1;       //guilty until proven innocent
842         //cout << seq.getName() << endl;
843         //can you find the forward barcode
844         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
845             string foligo = it->second.forward;
846             string roligo = it->second.reverse;
847             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
848             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
849             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
850                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
851                 break;
852             }
853             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
854                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
855                 break;
856             }
857             
858             if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
859             
860             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
861                 group = it->first;
862                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
863                 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
864                 if(qual.getName() != ""){
865                     qual.trimQScores(-1, rawSeq.length()-roligo.length());
866                     qual.trimQScores(foligo.length(), -1);
867                 }
868                 success = 0;
869                 break;
870             }
871         }
872         //cout << "success=" << success << endl;
873         //if you found the barcode or if you don't want to allow for diffs
874         if ((bdiffs == 0) || (success == 0)) { return success; }
875         else { //try aligning and see if you can find it
876             Alignment* alignment;
877             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
878             else{ alignment = NULL; }
879             
880             //can you find the barcode
881             int minDiff = 1e6;
882             int minCount = 1;
883             vector< vector<int> > minFGroup;
884             vector<int> minFPos;
885             
886             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
887             /*
888              1 Sarah Westcott
889              2 John Westcott
890              3 Anna Westcott
891              4 Sarah Schloss
892              5 Pat Schloss
893              6 Gail Brown
894              7 Pat Moore
895              only want to look for forward = Sarah, John, Anna, Pat, Gail
896              reverse = Westcott, Schloss, Brown, Moore
897              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
898              */
899             //cout << endl << forwardSeq.getName() << endl;
900             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
901                 string oligo = it->first;
902                 
903                 if(rawSeq.length() < maxFBarcodeLength){        //let's just assume that the barcodes are the same length
904                     success = bdiffs + 10;
905                     break;
906                 }
907                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
908                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
909                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
910                 oligo = alignment->getSeqAAln();
911                 string temp = alignment->getSeqBAln();
912                 
913                 int alnLength = oligo.length();
914                 
915                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
916                 oligo = oligo.substr(0,alnLength);
917                 temp = temp.substr(0,alnLength);
918                 int numDiff = countDiffs(oligo, temp);
919                 
920                 if (alnLength == 0) { numDiff = bdiffs + 100; }
921                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
922                 
923                 if(numDiff < minDiff){
924                     minDiff = numDiff;
925                     minCount = 1;
926                     minFGroup.clear();
927                     minFGroup.push_back(it->second);
928                     int tempminFPos = 0;
929                     minFPos.clear();
930                     for(int i=0;i<alnLength;i++){
931                         if(temp[i] != '-'){
932                             tempminFPos++;
933                         }
934                     }
935                     minFPos.push_back(tempminFPos);
936                 }else if(numDiff == minDiff){
937                     minFGroup.push_back(it->second);
938                     int tempminFPos = 0;
939                     for(int i=0;i<alnLength;i++){
940                         if(temp[i] != '-'){
941                             tempminFPos++;
942                         }
943                     }
944                     minFPos.push_back(tempminFPos);
945                 }
946             }
947             
948             //cout << minDiff << '\t' << minCount << '\t' << endl;
949             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
950             else{
951                 //check for reverse match
952                 if (alignment != NULL) { delete alignment; }
953                 
954                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
955                 else{ alignment = NULL; }
956                 
957                 //can you find the barcode
958                 minDiff = 1e6;
959                 minCount = 1;
960                 vector< vector<int> > minRGroup;
961                 vector<int> minRPos;
962                 
963                 string rawRSequence = reverseOligo(seq.getUnaligned());
964                 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
965                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
966                     string oligo = reverseOligo(it->first);
967                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
968                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
969                         success = bdiffs + 10;
970                         break;
971                     }
972                     
973                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
974                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
975                     oligo = alignment->getSeqAAln();
976                     string temp = alignment->getSeqBAln();
977                     
978                     int alnLength = oligo.length();
979                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
980                     oligo = oligo.substr(0,alnLength);
981                     temp = temp.substr(0,alnLength);
982                     int numDiff = countDiffs(oligo, temp);
983                     if (alnLength == 0) { numDiff = bdiffs + 100; }
984                     
985                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
986                     if(numDiff < minDiff){
987                         minDiff = numDiff;
988                         minCount = 1;
989                         minRGroup.clear();
990                         minRGroup.push_back(it->second);
991                         int tempminRPos = 0;
992                         minRPos.clear();
993                         for(int i=0;i<alnLength;i++){
994                             if(temp[i] != '-'){
995                                 tempminRPos++;
996                             }
997                         }
998                         minRPos.push_back(tempminRPos);
999                     }else if(numDiff == minDiff){
1000                         int tempminRPos = 0;
1001                         for(int i=0;i<alnLength;i++){
1002                             if(temp[i] != '-'){
1003                                 tempminRPos++;
1004                             }
1005                         }
1006                         minRPos.push_back(tempminRPos);
1007                         minRGroup.push_back(it->second);
1008                     }
1009                     
1010                 }
1011                 
1012                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
1013                 else {
1014                     bool foundMatch = false;
1015                     vector<int> matches;
1016                     for (int i = 0; i < minFGroup.size(); i++) {
1017                         for (int j = 0; j < minFGroup[i].size(); j++) {
1018                             for (int k = 0; k < minRGroup.size(); k++) {
1019                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1020                             }
1021                         }
1022                     }
1023                     
1024                     int fStart = 0;
1025                     int rStart = 0;
1026                     if (matches.size() == 1) {
1027                         foundMatch = true;
1028                         group = matches[0];
1029                         fStart = minFPos[0];
1030                         rStart = rawSeq.length() - minRPos[0];
1031                         if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
1032                     }
1033                     
1034                     //we have an acceptable match for the forward and reverse, but do they match?
1035                     if (foundMatch) {
1036                         string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1037                         seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1038                         if(qual.getName() != ""){
1039                             qual.trimQScores(-1, rStart);
1040                             qual.trimQScores(fStart, -1);
1041                         }
1042                         success = minDiff;
1043                         //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1044                     }else { success = bdiffs + 100;     }
1045                 }
1046             }
1047             
1048             if (alignment != NULL) { delete alignment; }
1049         }
1050         
1051         return success;
1052         
1053     }
1054     catch(exception& e) {
1055         m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1056         exit(1);
1057     }
1058     
1059 }
1060 //*******************************************************************/
1061 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1062     try {
1063         //look for forward
1064         string rawSeq = seq.getUnaligned();
1065         int success = pdiffs + 1;       //guilty until proven innocent
1066         //cout << seq.getName() << endl;
1067         //can you find the forward
1068         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1069             string foligo = it->second.forward;
1070             string roligo = it->second.reverse;
1071             
1072             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1073             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1074             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
1075                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1076                 break;
1077             }
1078             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
1079                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1080                 break;
1081             }
1082             
1083             if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
1084             
1085             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1086                 group = it->first;
1087                 if (!keepForward) {
1088                     string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1089                     seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1090                     if(qual.getName() != ""){
1091                         qual.trimQScores(-1, rawSeq.length()-roligo.length());
1092                         qual.trimQScores(foligo.length(), -1);
1093                     }
1094                 }
1095                 success = 0;
1096                 break;
1097             }
1098         }
1099         //cout << "success=" << success << endl;
1100         //if you found the barcode or if you don't want to allow for diffs
1101         if ((pdiffs == 0) || (success == 0)) { return success; }
1102         else { //try aligning and see if you can find it
1103             Alignment* alignment;
1104             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1105             else{ alignment = NULL; }
1106             
1107             //can you find the barcode
1108             int minDiff = 1e6;
1109             int minCount = 1;
1110             vector< vector<int> > minFGroup;
1111             vector<int> minFPos;
1112             
1113             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1114             /*
1115              1 Sarah Westcott
1116              2 John Westcott
1117              3 Anna Westcott
1118              4 Sarah Schloss
1119              5 Pat Schloss
1120              6 Gail Brown
1121              7 Pat Moore
1122              only want to look for forward = Sarah, John, Anna, Pat, Gail
1123              reverse = Westcott, Schloss, Brown, Moore
1124              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1125              */
1126             //cout << endl << forwardSeq.getName() << endl;
1127             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1128                 string oligo = it->first;
1129                 
1130                 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1131                     success = pdiffs + 10;
1132                     break;
1133                 }
1134                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
1135                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1136                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1137                 oligo = alignment->getSeqAAln();
1138                 string temp = alignment->getSeqBAln();
1139                 
1140                 int alnLength = oligo.length();
1141                 
1142                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1143                 oligo = oligo.substr(0,alnLength);
1144                 temp = temp.substr(0,alnLength);
1145                 int numDiff = countDiffs(oligo, temp);
1146                 
1147                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1148                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1149                 
1150                 if(numDiff < minDiff){
1151                     minDiff = numDiff;
1152                     minCount = 1;
1153                     minFGroup.clear();
1154                     minFGroup.push_back(it->second);
1155                     int tempminFPos = 0;
1156                     minFPos.clear();
1157                     for(int i=0;i<alnLength;i++){
1158                         if(temp[i] != '-'){
1159                             tempminFPos++;
1160                         }
1161                     }
1162                     minFPos.push_back(tempminFPos);
1163                 }else if(numDiff == minDiff){
1164                     minFGroup.push_back(it->second);
1165                     int tempminFPos = 0;
1166                     for(int i=0;i<alnLength;i++){
1167                         if(temp[i] != '-'){
1168                             tempminFPos++;
1169                         }
1170                     }
1171                     minFPos.push_back(tempminFPos);
1172                 }
1173             }
1174             
1175             //cout << minDiff << '\t' << minCount << '\t' << endl;
1176             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1177             else{
1178                 //check for reverse match
1179                 if (alignment != NULL) { delete alignment; }
1180                 
1181                 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1182                 else{ alignment = NULL; }
1183                 
1184                 //can you find the barcode
1185                 minDiff = 1e6;
1186                 minCount = 1;
1187                 vector< vector<int> > minRGroup;
1188                 vector<int> minRPos;
1189                 
1190                 string rawRSequence = reverseOligo(seq.getUnaligned());
1191                 
1192                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1193                     string oligo = reverseOligo(it->first);
1194                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1195                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1196                         success = pdiffs + 10;
1197                         break;
1198                     }
1199                     
1200                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1201                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1202                     oligo = alignment->getSeqAAln();
1203                     string temp = alignment->getSeqBAln();
1204                     
1205                     int alnLength = oligo.length();
1206                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1207                     oligo = oligo.substr(0,alnLength);
1208                     temp = temp.substr(0,alnLength);
1209                     int numDiff = countDiffs(oligo, temp);
1210                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1211                     
1212                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1213                     if(numDiff < minDiff){
1214                         minDiff = numDiff;
1215                         minCount = 1;
1216                         minRGroup.clear();
1217                         minRGroup.push_back(it->second);
1218                         int tempminRPos = 0;
1219                         minRPos.clear();
1220                         for(int i=0;i<alnLength;i++){
1221                             if(temp[i] != '-'){
1222                                 tempminRPos++;
1223                             }
1224                         }
1225                         minRPos.push_back(tempminRPos);
1226                     }else if(numDiff == minDiff){
1227                         int tempminRPos = 0;
1228                         for(int i=0;i<alnLength;i++){
1229                             if(temp[i] != '-'){
1230                                 tempminRPos++;
1231                             }
1232                         }
1233                         minRPos.push_back(tempminRPos);
1234                         minRGroup.push_back(it->second);
1235                     }
1236                     
1237                 }
1238                 
1239                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1240                 else {
1241                     bool foundMatch = false;
1242                     vector<int> matches;
1243                     for (int i = 0; i < minFGroup.size(); i++) {
1244                         for (int j = 0; j < minFGroup[i].size(); j++) {
1245                             for (int k = 0; k < minRGroup.size(); k++) {
1246                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1247                             }
1248                         }
1249                     }
1250                     
1251                     int fStart = 0;
1252                     int rStart = 0;
1253                     if (matches.size() == 1) {
1254                         foundMatch = true;
1255                         group = matches[0];
1256                         fStart = minFPos[0];
1257                         rStart = rawSeq.length() - minRPos[0];
1258                         if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
1259                     }
1260                     
1261                     //we have an acceptable match for the forward and reverse, but do they match?
1262                     if (foundMatch) {
1263                         if (!keepForward) {
1264                             string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1265                             seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1266                             if(qual.getName() != ""){
1267                                 qual.trimQScores(-1, rStart);
1268                                 qual.trimQScores(fStart, -1);
1269                             }
1270                         }
1271                         success = minDiff;
1272                         //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1273                     }else { success = pdiffs + 100;     }
1274                 }
1275             }
1276             
1277             if (alignment != NULL) { delete alignment; }
1278         }
1279         
1280         return success;
1281         
1282     }
1283     catch(exception& e) {
1284         m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1285         exit(1);
1286     }
1287     
1288 }
1289
1290 //*******************************************************************/
1291 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1292     try {
1293         //look for forward barcode
1294         string rawFSequence = forwardSeq.getUnaligned();
1295         string rawRSequence = reverseSeq.getUnaligned();
1296         int success = pdiffs + 1;       //guilty until proven innocent
1297         
1298         //can you find the forward barcode
1299         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1300             string foligo = it->second.forward;
1301             string roligo = it->second.reverse;
1302             
1303             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1304                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1305                 break;
1306             }
1307             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1308                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1309                 break;
1310             }
1311             
1312             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
1313                 group = it->first;
1314                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1315                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1316                 forwardQual.trimQScores(foligo.length(), -1);
1317                 reverseQual.trimQScores(roligo.length(), -1);
1318                 success = 0;
1319                 break;
1320             }
1321         }
1322         
1323         //if you found the barcode or if you don't want to allow for diffs
1324         if ((pdiffs == 0) || (success == 0)) { return success; }
1325         else { //try aligning and see if you can find it
1326             Alignment* alignment;
1327             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1328             else{ alignment = NULL; }
1329             
1330             //can you find the barcode
1331             int minDiff = 1e6;
1332             int minCount = 1;
1333             vector< vector<int> > minFGroup;
1334             vector<int> minFPos;
1335             
1336             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1337             /*
1338              1 Sarah Westcott
1339              2 John Westcott
1340              3 Anna Westcott
1341              4 Sarah Schloss
1342              5 Pat Schloss
1343              6 Gail Brown
1344              7 Pat Moore
1345              only want to look for forward = Sarah, John, Anna, Pat, Gail
1346              reverse = Westcott, Schloss, Brown, Moore
1347              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1348              */
1349             //cout << endl << forwardSeq.getName() << endl;
1350             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1351                 string oligo = it->first;
1352                 
1353                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1354                     success = pdiffs + 10;
1355                     break;
1356                 }
1357                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1358                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1359                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1360                 oligo = alignment->getSeqAAln();
1361                 string temp = alignment->getSeqBAln();
1362                 
1363                 int alnLength = oligo.length();
1364                 
1365                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1366                 oligo = oligo.substr(0,alnLength);
1367                 temp = temp.substr(0,alnLength);
1368                 int numDiff = countDiffs(oligo, temp);
1369                 
1370                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1371                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1372                 
1373                 if(numDiff < minDiff){
1374                     minDiff = numDiff;
1375                     minCount = 1;
1376                     minFGroup.clear();
1377                     minFGroup.push_back(it->second);
1378                     int tempminFPos = 0;
1379                     minFPos.clear();
1380                     for(int i=0;i<alnLength;i++){
1381                         if(temp[i] != '-'){
1382                             tempminFPos++;
1383                         }
1384                     }
1385                     minFPos.push_back(tempminFPos);
1386                 }else if(numDiff == minDiff){
1387                     minFGroup.push_back(it->second);
1388                     int tempminFPos = 0;
1389                     for(int i=0;i<alnLength;i++){
1390                         if(temp[i] != '-'){
1391                             tempminFPos++;
1392                         }
1393                     }
1394                     minFPos.push_back(tempminFPos);
1395                 }
1396             }
1397             
1398             //cout << minDiff << '\t' << minCount << '\t' << endl;
1399             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1400             else{
1401                 //check for reverse match
1402                 if (alignment != NULL) { delete alignment; }
1403                 
1404                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1405                 else{ alignment = NULL; }
1406                 
1407                 //can you find the barcode
1408                 minDiff = 1e6;
1409                 minCount = 1;
1410                 vector< vector<int> > minRGroup;
1411                 vector<int> minRPos;
1412                 
1413                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1414                     string oligo = it->first;
1415                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1416                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1417                         success = pdiffs + 10;
1418                         break;
1419                     }
1420                     
1421                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1422                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1423                     oligo = alignment->getSeqAAln();
1424                     string temp = alignment->getSeqBAln();
1425                     
1426                     int alnLength = oligo.length();
1427                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1428                     oligo = oligo.substr(0,alnLength);
1429                     temp = temp.substr(0,alnLength);
1430                     int numDiff = countDiffs(oligo, temp);
1431                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1432                     
1433                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1434                     if(numDiff < minDiff){
1435                         minDiff = numDiff;
1436                         minCount = 1;
1437                         minRGroup.clear();
1438                         minRGroup.push_back(it->second);
1439                         int tempminRPos = 0;
1440                         minRPos.clear();
1441                         for(int i=0;i<alnLength;i++){
1442                             if(temp[i] != '-'){
1443                                 tempminRPos++;
1444                             }
1445                         }
1446                         minRPos.push_back(tempminRPos);
1447                     }else if(numDiff == minDiff){
1448                         int tempminRPos = 0;
1449                         for(int i=0;i<alnLength;i++){
1450                             if(temp[i] != '-'){
1451                                 tempminRPos++;
1452                             }
1453                         }
1454                         minRPos.push_back(tempminRPos);
1455                         minRGroup.push_back(it->second);
1456                     }
1457                     
1458                 }
1459                 
1460                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1461                 else {
1462                     bool foundMatch = false;
1463                     vector<int> matches;
1464                     for (int i = 0; i < minFGroup.size(); i++) {
1465                         for (int j = 0; j < minFGroup[i].size(); j++) {
1466                             for (int k = 0; k < minRGroup.size(); k++) {
1467                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1468                             }
1469                         }
1470                     }
1471                     
1472                     int fStart = 0;
1473                     int rStart = 0;
1474                     if (matches.size() == 1) {
1475                         foundMatch = true;
1476                         group = matches[0];
1477                         fStart = minFPos[0];
1478                         rStart = minRPos[0];
1479                     }
1480                     
1481                     //we have an acceptable match for the forward and reverse, but do they match?
1482                     if (foundMatch) {
1483                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1484                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1485                         forwardQual.trimQScores(fStart, -1);
1486                         reverseQual.trimQScores(rStart, -1);
1487                         success = minDiff;
1488                     }else { success = pdiffs + 100;     }
1489                 }
1490             }
1491             
1492             if (alignment != NULL) { delete alignment; }
1493         }
1494         
1495         return success;
1496         
1497     }
1498     catch(exception& e) {
1499         m->errorOut(e, "TrimOligos", "stripIForward");
1500         exit(1);
1501     }
1502     
1503 }
1504 //*******************************************************************/
1505 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1506     try {
1507         //look for forward barcode
1508         string rawFSequence = forwardSeq.getUnaligned();
1509         string rawRSequence = reverseSeq.getUnaligned();
1510         int success = pdiffs + 1;       //guilty until proven innocent
1511         
1512         //can you find the forward barcode
1513         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1514             string foligo = it->second.forward;
1515             string roligo = it->second.reverse;
1516             
1517             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1518                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1519                 break;
1520             }
1521             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1522                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1523                 break;
1524             }
1525             
1526             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1527                 group = it->first;
1528                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1529                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1530                 success = 0;
1531                 break;
1532             }
1533         }
1534         
1535         //if you found the barcode or if you don't want to allow for diffs
1536         if ((pdiffs == 0) || (success == 0)) { return success; }
1537         else { //try aligning and see if you can find it
1538             Alignment* alignment;
1539             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1540             else{ alignment = NULL; }
1541             
1542             //can you find the barcode
1543             int minDiff = 1e6;
1544             int minCount = 1;
1545             vector< vector<int> > minFGroup;
1546             vector<int> minFPos;
1547             
1548             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1549             /*
1550              1 Sarah Westcott
1551              2 John Westcott
1552              3 Anna Westcott
1553              4 Sarah Schloss
1554              5 Pat Schloss
1555              6 Gail Brown
1556              7 Pat Moore
1557              only want to look for forward = Sarah, John, Anna, Pat, Gail
1558              reverse = Westcott, Schloss, Brown, Moore
1559              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1560              */
1561             //cout << endl << forwardSeq.getName() << endl;
1562             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1563                 string oligo = it->first;
1564                 
1565                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1566                     success = pdiffs + 10;
1567                     break;
1568                 }
1569                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1570                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1571                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1572                 oligo = alignment->getSeqAAln();
1573                 string temp = alignment->getSeqBAln();
1574                 
1575                 int alnLength = oligo.length();
1576                 
1577                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1578                 oligo = oligo.substr(0,alnLength);
1579                 temp = temp.substr(0,alnLength);
1580                 int numDiff = countDiffs(oligo, temp);
1581                 
1582                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1583                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1584                 
1585                 if(numDiff < minDiff){
1586                     minDiff = numDiff;
1587                     minCount = 1;
1588                     minFGroup.clear();
1589                     minFGroup.push_back(it->second);
1590                     int tempminFPos = 0;
1591                     minFPos.clear();
1592                     for(int i=0;i<alnLength;i++){
1593                         if(temp[i] != '-'){
1594                             tempminFPos++;
1595                         }
1596                     }
1597                     minFPos.push_back(tempminFPos);
1598                 }else if(numDiff == minDiff){
1599                     minFGroup.push_back(it->second);
1600                     int tempminFPos = 0;
1601                     for(int i=0;i<alnLength;i++){
1602                         if(temp[i] != '-'){
1603                             tempminFPos++;
1604                         }
1605                     }
1606                     minFPos.push_back(tempminFPos);
1607                 }
1608             }
1609             
1610             //cout << minDiff << '\t' << minCount << '\t' << endl;
1611             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1612             else{
1613                 //check for reverse match
1614                 if (alignment != NULL) { delete alignment; }
1615                 
1616                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1617                 else{ alignment = NULL; }
1618                 
1619                 //can you find the barcode
1620                 minDiff = 1e6;
1621                 minCount = 1;
1622                 vector< vector<int> > minRGroup;
1623                 vector<int> minRPos;
1624                 
1625                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1626                     string oligo = it->first;
1627                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1628                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1629                         success = pdiffs + 10;
1630                         break;
1631                     }
1632                     
1633                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1634                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1635                     oligo = alignment->getSeqAAln();
1636                     string temp = alignment->getSeqBAln();
1637                     
1638                     int alnLength = oligo.length();
1639                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1640                     oligo = oligo.substr(0,alnLength);
1641                     temp = temp.substr(0,alnLength);
1642                     int numDiff = countDiffs(oligo, temp);
1643                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1644                     
1645                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1646                     if(numDiff < minDiff){
1647                         minDiff = numDiff;
1648                         minCount = 1;
1649                         minRGroup.clear();
1650                         minRGroup.push_back(it->second);
1651                         int tempminRPos = 0;
1652                         minRPos.clear();
1653                         for(int i=0;i<alnLength;i++){
1654                             if(temp[i] != '-'){
1655                                 tempminRPos++;
1656                             }
1657                         }
1658                         minRPos.push_back(tempminRPos);
1659                     }else if(numDiff == minDiff){
1660                         int tempminRPos = 0;
1661                         for(int i=0;i<alnLength;i++){
1662                             if(temp[i] != '-'){
1663                                 tempminRPos++;
1664                             }
1665                         }
1666                         minRPos.push_back(tempminRPos);
1667                         minRGroup.push_back(it->second);
1668                     }
1669                     
1670                 }
1671                 
1672                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1673                 else {
1674                     bool foundMatch = false;
1675                     vector<int> matches;
1676                     for (int i = 0; i < minFGroup.size(); i++) {
1677                         for (int j = 0; j < minFGroup[i].size(); j++) {
1678                             for (int k = 0; k < minRGroup.size(); k++) {
1679                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1680                             }
1681                         }
1682                     }
1683                     
1684                     int fStart = 0;
1685                     int rStart = 0;
1686                     if (matches.size() == 1) {
1687                         foundMatch = true;
1688                         group = matches[0];
1689                         fStart = minFPos[0];
1690                         rStart = minRPos[0];
1691                     }
1692                     
1693                     //we have an acceptable match for the forward and reverse, but do they match?
1694                     if (foundMatch) {
1695                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1696                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1697                         success = minDiff;
1698                     }else { success = pdiffs + 100;     }
1699                 }
1700             }
1701             
1702             if (alignment != NULL) { delete alignment; }
1703         }
1704         
1705         return success;
1706         
1707     }
1708     catch(exception& e) {
1709         m->errorOut(e, "TrimOligos", "stripIForward");
1710         exit(1);
1711     }
1712     
1713 }
1714 //*******************************************************************/
1715 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1716     try {
1717         
1718         string rawSequence = seq.getUnaligned();
1719         int success = bdiffs + 1;       //guilty until proven innocent
1720         
1721         //can you find the barcode
1722         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1723             string oligo = it->first;
1724             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
1725                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1726                 break;
1727             }
1728             
1729             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1730                 group = it->second;
1731                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1732                 
1733                 success = 0;
1734                 break;
1735             }
1736         }
1737         
1738         //if you found the barcode or if you don't want to allow for diffs
1739         if ((bdiffs == 0) || (success == 0)) { return success; }
1740         
1741         else { //try aligning and see if you can find it
1742             Alignment* alignment;
1743             if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1744             else{ alignment = NULL; }
1745             
1746             //can you find the barcode
1747             int minDiff = 1e6;
1748             int minCount = 1;
1749             int minGroup = -1;
1750             int minPos = 0;
1751             
1752             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1753                 string oligo = it->first;
1754                 // int length = oligo.length();
1755                 
1756                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
1757                     success = bdiffs + 10;
1758                     break;
1759                 }
1760                 
1761                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1762                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1763                 oligo = alignment->getSeqAAln();
1764                 string temp = alignment->getSeqBAln();
1765                 
1766                 int alnLength = oligo.length();
1767                 
1768                 for(int i=oligo.length()-1;i>=0;i--){
1769                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1770                 }
1771                 oligo = oligo.substr(0,alnLength);
1772                 temp = temp.substr(0,alnLength);
1773                 
1774                 int numDiff = countDiffs(oligo, temp);
1775                 
1776                 if(numDiff < minDiff){
1777                     minDiff = numDiff;
1778                     minCount = 1;
1779                     minGroup = it->second;
1780                     minPos = 0;
1781                     for(int i=0;i<alnLength;i++){
1782                         if(temp[i] != '-'){
1783                             minPos++;
1784                         }
1785                     }
1786                 }
1787                 else if(numDiff == minDiff){
1788                     minCount++;
1789                 }
1790                 
1791             }
1792             
1793             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
1794             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1795             else{       //use the best match
1796                 group = minGroup;
1797                 seq.setUnaligned(rawSequence.substr(minPos));
1798                 success = minDiff;
1799             }
1800             
1801             if (alignment != NULL) { delete alignment; }
1802             
1803         }
1804         
1805         return success;
1806         
1807     }
1808     catch(exception& e) {
1809         m->errorOut(e, "TrimOligos", "stripBarcode");
1810         exit(1);
1811     }
1812     
1813 }
1814
1815 /********************************************************************/
1816 int TrimOligos::stripForward(Sequence& seq, int& group){
1817     try {
1818         
1819         string rawSequence = seq.getUnaligned();
1820         int success = pdiffs + 1;       //guilty until proven innocent
1821         
1822         //can you find the primer
1823         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1824             string oligo = it->first;
1825             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
1826                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1827                 break;
1828             }
1829             
1830             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1831                 group = it->second;
1832                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1833                 success = 0;
1834                 break;
1835             }
1836         }
1837         
1838         //if you found the barcode or if you don't want to allow for diffs
1839         if ((pdiffs == 0) || (success == 0)) {  return success; }
1840         
1841         else { //try aligning and see if you can find it
1842             Alignment* alignment;
1843             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1844             else{ alignment = NULL; }
1845             
1846             //can you find the barcode
1847             int minDiff = 1e6;
1848             int minCount = 1;
1849             int minGroup = -1;
1850             int minPos = 0;
1851             
1852             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1853                 string oligo = it->first;
1854                 // int length = oligo.length();
1855                 
1856                 if(rawSequence.length() < maxFPrimerLength){
1857                     success = pdiffs + 100;
1858                     break;
1859                 }
1860                 
1861                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1862                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1863                 oligo = alignment->getSeqAAln();
1864                 string temp = alignment->getSeqBAln();
1865                 
1866                 int alnLength = oligo.length();
1867                 
1868                 for(int i=oligo.length()-1;i>=0;i--){
1869                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1870                 }
1871                 oligo = oligo.substr(0,alnLength);
1872                 temp = temp.substr(0,alnLength);
1873                 
1874                 int numDiff = countDiffs(oligo, temp);
1875                 
1876                 if(numDiff < minDiff){
1877                     minDiff = numDiff;
1878                     minCount = 1;
1879                     minGroup = it->second;
1880                     minPos = 0;
1881                     for(int i=0;i<alnLength;i++){
1882                         if(temp[i] != '-'){
1883                             minPos++;
1884                         }
1885                     }
1886                 }
1887                 else if(numDiff == minDiff){
1888                     minCount++;
1889                 }
1890                 
1891             }
1892             
1893             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1894             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1895             else{       //use the best match
1896                 group = minGroup;
1897                 seq.setUnaligned(rawSequence.substr(minPos));
1898                 success = minDiff;
1899             }
1900             
1901             if (alignment != NULL) { delete alignment; }
1902             
1903         }
1904         
1905         return success;
1906         
1907     }
1908     catch(exception& e) {
1909         m->errorOut(e, "TrimOligos", "stripForward");
1910         exit(1);
1911     }
1912 }
1913 //*******************************************************************/
1914 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1915     try {
1916         
1917         if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
1918         
1919         string rawSequence = seq.getUnaligned();
1920         int success = pdiffs + 1;       //guilty until proven innocent
1921         
1922         //can you find the primer
1923         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1924             string oligo = it->first;
1925             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
1926                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1927                 break;
1928             }
1929             
1930             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1931                 group = it->second;
1932                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1933                 if(qual.getName() != ""){
1934                     if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
1935                 }
1936                 success = 0;
1937                 break;
1938             }
1939         }
1940         
1941         //if you found the barcode or if you don't want to allow for diffs
1942         if ((pdiffs == 0) || (success == 0)) { return success; }
1943         
1944         else { //try aligning and see if you can find it
1945             Alignment* alignment;
1946             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1947             else{ alignment = NULL; }
1948             
1949             //can you find the barcode
1950             int minDiff = 1e6;
1951             int minCount = 1;
1952             int minGroup = -1;
1953             int minPos = 0;
1954             
1955             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1956                 string oligo = it->first;
1957                 // int length = oligo.length();
1958                 
1959                 if(rawSequence.length() < maxFPrimerLength){
1960                     success = pdiffs + 100;
1961                     break;
1962                 }
1963                 
1964                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1965                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1966                 oligo = alignment->getSeqAAln();
1967                 string temp = alignment->getSeqBAln();
1968                 
1969                 int alnLength = oligo.length();
1970                 
1971                 for(int i=oligo.length()-1;i>=0;i--){
1972                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1973                 }
1974                 oligo = oligo.substr(0,alnLength);
1975                 temp = temp.substr(0,alnLength);
1976                 
1977                 int numDiff = countDiffs(oligo, temp);
1978                 
1979                 if(numDiff < minDiff){
1980                     minDiff = numDiff;
1981                     minCount = 1;
1982                     minGroup = it->second;
1983                     minPos = 0;
1984                     for(int i=0;i<alnLength;i++){
1985                         if(temp[i] != '-'){
1986                             minPos++;
1987                         }
1988                     }
1989                 }
1990                 else if(numDiff == minDiff){
1991                     minCount++;
1992                 }
1993                 
1994             }
1995             
1996             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1997             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1998             else{       //use the best match
1999                 group = minGroup;
2000                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2001                 if(qual.getName() != ""){
2002                     if (!keepForward) { qual.trimQScores(minPos, -1); }
2003                 }
2004                 success = minDiff;
2005             }
2006             
2007             if (alignment != NULL) { delete alignment; }
2008             
2009         }
2010         
2011         return success;
2012         
2013     }
2014     catch(exception& e) {
2015         m->errorOut(e, "TrimOligos", "stripForward");
2016         exit(1);
2017     }
2018 }
2019 //******************************************************************/
2020 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2021     try {
2022         string rawSequence = seq.getUnaligned();
2023         bool success = 0;       //guilty until proven innocent
2024         
2025         for(int i=0;i<revPrimer.size();i++){
2026             string oligo = revPrimer[i];
2027             
2028             if(rawSequence.length() < oligo.length()){
2029                 success = 0;
2030                 break;
2031             }
2032             
2033             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2034                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2035                 if(qual.getName() != ""){
2036                     qual.trimQScores(-1, rawSequence.length()-oligo.length());
2037                 }
2038                 success = 1;
2039                 break;
2040             }
2041         }       
2042         return success;
2043         
2044     }
2045     catch(exception& e) {
2046         m->errorOut(e, "TrimOligos", "stripReverse");
2047         exit(1);
2048     }
2049 }
2050 //******************************************************************/
2051 bool TrimOligos::stripReverse(Sequence& seq){
2052     try {
2053         
2054         string rawSequence = seq.getUnaligned();
2055         bool success = 0;       //guilty until proven innocent
2056         
2057         for(int i=0;i<revPrimer.size();i++){
2058             string oligo = revPrimer[i];
2059             
2060             if(rawSequence.length() < oligo.length()){
2061                 success = 0;
2062                 break;
2063             }
2064             
2065             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2066                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2067                 success = 1;
2068                 break;
2069             }
2070         }       
2071         
2072         return success;
2073         
2074     }
2075     catch(exception& e) {
2076         m->errorOut(e, "TrimOligos", "stripReverse");
2077         exit(1);
2078     }
2079 }
2080 //******************************************************************/
2081 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2082     try {
2083         string rawSequence = seq.getUnaligned();
2084         bool success = ldiffs + 1;      //guilty until proven innocent
2085         
2086         for(int i=0;i<linker.size();i++){
2087             string oligo = linker[i];
2088             
2089             if(rawSequence.length() < oligo.length()){
2090                 success = ldiffs + 10;
2091                 break;
2092             }
2093             
2094             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2095                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2096                 if(qual.getName() != ""){
2097                     qual.trimQScores(oligo.length(), -1);
2098                 }
2099                 success = 0;
2100                 break;
2101             }
2102         }
2103         
2104         //if you found the linker or if you don't want to allow for diffs
2105         if ((ldiffs == 0) || (success == 0)) { return success; }
2106         
2107         else { //try aligning and see if you can find it
2108             Alignment* alignment;
2109             if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }   
2110             else{ alignment = NULL; }
2111             
2112             //can you find the barcode
2113             int minDiff = 1e6;
2114             int minCount = 1;
2115             int minPos = 0;
2116             
2117             for(int i = 0; i < linker.size(); i++){
2118                 string oligo = linker[i];
2119                 // int length = oligo.length();
2120                 
2121                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2122                     success = ldiffs + 10;
2123                     break;
2124                 }
2125                 
2126                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2127                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2128                 oligo = alignment->getSeqAAln();
2129                 string temp = alignment->getSeqBAln();
2130                 
2131                 int alnLength = oligo.length();
2132                 
2133                 for(int i=oligo.length()-1;i>=0;i--){
2134                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2135                 }
2136                 oligo = oligo.substr(0,alnLength);
2137                 temp = temp.substr(0,alnLength);
2138                 
2139                 int numDiff = countDiffs(oligo, temp);
2140                 
2141                 if(numDiff < minDiff){
2142                     minDiff = numDiff;
2143                     minCount = 1;
2144                     minPos = 0;
2145                     for(int i=0;i<alnLength;i++){
2146                         if(temp[i] != '-'){
2147                             minPos++;
2148                         }
2149                     }
2150                 }
2151                 else if(numDiff == minDiff){
2152                     minCount++;
2153                 }
2154                 
2155             }
2156             
2157             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2158             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2159             else{       //use the best match
2160                 seq.setUnaligned(rawSequence.substr(minPos));
2161                 
2162                 if(qual.getName() != ""){
2163                     qual.trimQScores(minPos, -1);
2164                 }
2165                 success = minDiff;
2166             }
2167             
2168             if (alignment != NULL) { delete alignment; }
2169             
2170         }
2171         
2172         
2173         return success;
2174         
2175     }
2176     catch(exception& e) {
2177         m->errorOut(e, "TrimOligos", "stripLinker");
2178         exit(1);
2179     }
2180 }
2181 //******************************************************************/
2182 bool TrimOligos::stripLinker(Sequence& seq){
2183     try {
2184         
2185         string rawSequence = seq.getUnaligned();
2186         bool success = ldiffs +1;       //guilty until proven innocent
2187         
2188         for(int i=0;i<linker.size();i++){
2189             string oligo = linker[i];
2190             
2191             if(rawSequence.length() < oligo.length()){
2192                 success = ldiffs +10;
2193                 break;
2194             }
2195             
2196             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2197                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2198                 success = 0;
2199                 break;
2200             }
2201         }       
2202         
2203         //if you found the linker or if you don't want to allow for diffs
2204         if ((ldiffs == 0) || (success == 0)) { return success; }
2205         
2206         else { //try aligning and see if you can find it
2207             Alignment* alignment;
2208             if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2209             else{ alignment = NULL; }
2210             
2211             //can you find the barcode
2212             int minDiff = 1e6;
2213             int minCount = 1;
2214             int minPos = 0;
2215             
2216             for(int i = 0; i < linker.size(); i++){
2217                 string oligo = linker[i];
2218                 // int length = oligo.length();
2219                 
2220                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2221                     success = ldiffs + 10;
2222                     break;
2223                 }
2224                 
2225                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2226                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2227                 oligo = alignment->getSeqAAln();
2228                 string temp = alignment->getSeqBAln();
2229                 
2230                 int alnLength = oligo.length();
2231                 
2232                 for(int i=oligo.length()-1;i>=0;i--){
2233                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2234                 }
2235                 oligo = oligo.substr(0,alnLength);
2236                 temp = temp.substr(0,alnLength);
2237                 
2238                 int numDiff = countDiffs(oligo, temp);
2239                 
2240                 if(numDiff < minDiff){
2241                     minDiff = numDiff;
2242                     minCount = 1;
2243                     minPos = 0;
2244                     for(int i=0;i<alnLength;i++){
2245                         if(temp[i] != '-'){
2246                             minPos++;
2247                         }
2248                     }
2249                 }
2250                 else if(numDiff == minDiff){
2251                     minCount++;
2252                 }
2253                 
2254             }
2255             
2256             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2257             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2258             else{       //use the best match
2259                 seq.setUnaligned(rawSequence.substr(minPos));
2260                 success = minDiff;
2261             }
2262             
2263             if (alignment != NULL) { delete alignment; }
2264             
2265         }
2266         
2267         return success;
2268         
2269     }
2270     catch(exception& e) {
2271         m->errorOut(e, "TrimOligos", "stripLinker");
2272         exit(1);
2273     }
2274 }
2275
2276 //******************************************************************/
2277 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2278     try {
2279         string rawSequence = seq.getUnaligned();
2280         bool success = sdiffs+1;        //guilty until proven innocent
2281         
2282         for(int i=0;i<spacer.size();i++){
2283             string oligo = spacer[i];
2284             
2285             if(rawSequence.length() < oligo.length()){
2286                 success = sdiffs+10;
2287                 break;
2288             }
2289             
2290             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2291                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2292                 if(qual.getName() != ""){
2293                     qual.trimQScores(oligo.length(), -1);
2294                 }
2295                 success = 0;
2296                 break;
2297             }
2298         }
2299         
2300         //if you found the spacer or if you don't want to allow for diffs
2301         if ((sdiffs == 0) || (success == 0)) { return success; }
2302         
2303         else { //try aligning and see if you can find it
2304             Alignment* alignment;
2305             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2306             else{ alignment = NULL; }
2307             
2308             //can you find the barcode
2309             int minDiff = 1e6;
2310             int minCount = 1;
2311             int minPos = 0;
2312             
2313             for(int i = 0; i < spacer.size(); i++){
2314                 string oligo = spacer[i];
2315                 // int length = oligo.length();
2316                 
2317                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2318                     success = sdiffs + 10;
2319                     break;
2320                 }
2321                 
2322                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2323                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2324                 oligo = alignment->getSeqAAln();
2325                 string temp = alignment->getSeqBAln();
2326                 
2327                 int alnLength = oligo.length();
2328                 
2329                 for(int i=oligo.length()-1;i>=0;i--){
2330                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2331                 }
2332                 oligo = oligo.substr(0,alnLength);
2333                 temp = temp.substr(0,alnLength);
2334                 
2335                 int numDiff = countDiffs(oligo, temp);
2336                 
2337                 if(numDiff < minDiff){
2338                     minDiff = numDiff;
2339                     minCount = 1;
2340                     minPos = 0;
2341                     for(int i=0;i<alnLength;i++){
2342                         if(temp[i] != '-'){
2343                             minPos++;
2344                         }
2345                     }
2346                 }
2347                 else if(numDiff == minDiff){
2348                     minCount++;
2349                 }
2350                 
2351             }
2352             
2353             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2354             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2355             else{       //use the best match
2356                 seq.setUnaligned(rawSequence.substr(minPos));
2357                 
2358                 if(qual.getName() != ""){
2359                     qual.trimQScores(minPos, -1);
2360                 }
2361                 success = minDiff;
2362             }
2363             
2364             if (alignment != NULL) { delete alignment; }
2365             
2366         }
2367         
2368         
2369         return success;
2370         
2371     }
2372     catch(exception& e) {
2373         m->errorOut(e, "TrimOligos", "stripSpacer");
2374         exit(1);
2375     }
2376 }
2377 //******************************************************************/
2378 bool TrimOligos::stripSpacer(Sequence& seq){
2379     try {
2380         
2381         string rawSequence = seq.getUnaligned();
2382         bool success = sdiffs+1;        //guilty until proven innocent
2383         
2384         for(int i=0;i<spacer.size();i++){
2385             string oligo = spacer[i];
2386             
2387             if(rawSequence.length() < oligo.length()){
2388                 success = sdiffs+10;
2389                 break;
2390             }
2391             
2392             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2393                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2394                 success = 0;
2395                 break;
2396             }
2397         }       
2398         
2399         //if you found the spacer or if you don't want to allow for diffs
2400         if ((sdiffs == 0) || (success == 0)) { return success; }
2401         
2402         else { //try aligning and see if you can find it
2403             Alignment* alignment;
2404             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2405             else{ alignment = NULL; }
2406             
2407             //can you find the barcode
2408             int minDiff = 1e6;
2409             int minCount = 1;
2410             int minPos = 0;
2411             
2412             for(int i = 0; i < spacer.size(); i++){
2413                 string oligo = spacer[i];
2414                 // int length = oligo.length();
2415                 
2416                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2417                     success = sdiffs + 10;
2418                     break;
2419                 }
2420                 
2421                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2422                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2423                 oligo = alignment->getSeqAAln();
2424                 string temp = alignment->getSeqBAln();
2425                 
2426                 int alnLength = oligo.length();
2427                 
2428                 for(int i=oligo.length()-1;i>=0;i--){
2429                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2430                 }
2431                 oligo = oligo.substr(0,alnLength);
2432                 temp = temp.substr(0,alnLength);
2433                 
2434                 int numDiff = countDiffs(oligo, temp);
2435                 
2436                 if(numDiff < minDiff){
2437                     minDiff = numDiff;
2438                     minCount = 1;
2439                     minPos = 0;
2440                     for(int i=0;i<alnLength;i++){
2441                         if(temp[i] != '-'){
2442                             minPos++;
2443                         }
2444                     }
2445                 }
2446                 else if(numDiff == minDiff){
2447                     minCount++;
2448                 }
2449                 
2450             }
2451             
2452             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2453             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2454             else{       //use the best match
2455                 seq.setUnaligned(rawSequence.substr(minPos));
2456                 success = minDiff;
2457             }
2458             
2459             if (alignment != NULL) { delete alignment; }
2460             
2461         }
2462         
2463         return success;
2464         
2465     }
2466     catch(exception& e) {
2467         m->errorOut(e, "TrimOligos", "stripSpacer");
2468         exit(1);
2469     }
2470 }
2471
2472 //******************************************************************/
2473 bool TrimOligos::compareDNASeq(string oligo, string seq){
2474     try {
2475         bool success = 1;
2476         int length = oligo.length();
2477         
2478         for(int i=0;i<length;i++){
2479             
2480             if(oligo[i] != seq[i]){
2481                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0; }
2482                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       success = 0;    }
2483                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       success = 0;    }
2484                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       success = 0;    }
2485                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       success = 0;    }
2486                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       success = 0;    }
2487                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       success = 0;    }
2488                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       success = 0;    }
2489                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2490                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2491                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
2492                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }       
2493                 
2494                 if(success == 0)        {       break;  }
2495             }
2496             else{
2497                 success = 1;
2498             }
2499         }
2500         
2501         return success;
2502     }
2503     catch(exception& e) {
2504         m->errorOut(e, "TrimOligos", "compareDNASeq");
2505         exit(1);
2506     }
2507     
2508 }
2509 //********************************************************************/
2510 int TrimOligos::countDiffs(string oligo, string seq){
2511     try {
2512         
2513         int length = oligo.length();
2514         int countDiffs = 0;
2515         
2516         for(int i=0;i<length;i++){
2517             
2518             if(oligo[i] != seq[i]){
2519                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++; }
2520                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       countDiffs++;   }
2521                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       countDiffs++;   }
2522                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       countDiffs++;   }
2523                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       countDiffs++;   }
2524                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       countDiffs++;   }
2525                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       countDiffs++;   }
2526                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       countDiffs++;   }
2527                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2528                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2529                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
2530                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
2531             }
2532             
2533         }
2534         
2535         return countDiffs;
2536     }
2537     catch(exception& e) {
2538         m->errorOut(e, "TrimOligos", "countDiffs");
2539         exit(1);
2540     }
2541 }
2542 //********************************************************************/
2543 string TrimOligos::reverseOligo(string oligo){
2544     try {
2545         string reverse = "";
2546         
2547         for(int i=oligo.length()-1;i>=0;i--){
2548             
2549             if(oligo[i] == 'A') {       reverse += 'T'; }
2550             else if(oligo[i] == 'T'){   reverse += 'A'; }
2551             else if(oligo[i] == 'U'){   reverse += 'A'; }
2552             
2553             else if(oligo[i] == 'G'){   reverse += 'C'; }
2554             else if(oligo[i] == 'C'){   reverse += 'G'; }
2555             
2556             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2557             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2558             
2559             else if(oligo[i] == 'M'){   reverse += 'K'; }
2560             else if(oligo[i] == 'K'){   reverse += 'M'; }
2561             
2562             else if(oligo[i] == 'W'){   reverse += 'W'; }
2563             else if(oligo[i] == 'S'){   reverse += 'S'; }
2564             
2565             else if(oligo[i] == 'B'){   reverse += 'V'; }
2566             else if(oligo[i] == 'V'){   reverse += 'B'; }
2567             
2568             else if(oligo[i] == 'D'){   reverse += 'H'; }
2569             else if(oligo[i] == 'H'){   reverse += 'D'; }
2570             
2571             else if(oligo[i] == '-'){   reverse += '-'; }
2572             else        {       reverse += 'N'; }
2573         }
2574         
2575         
2576         return reverse;
2577     }
2578     catch(exception& e) {
2579         m->errorOut(e, "TrimOligos", "reverseOligo");
2580         exit(1);
2581     }
2582 }
2583
2584 /********************************************************************/
2585
2586