]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
added make.lefse command. fixed bug in make.contigs with trimming reverse barcodes...
[mothur.git] / trimoligos.cpp
1 /*
2  * trimoligos.cpp
3  * Mothur
4  *
5  * Created by westcott on 9/1/11.
6  * Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18     try {
19         m = MothurOut::getInstance();
20         paired = false;
21         
22         pdiffs = p;
23         bdiffs = b;
24         ldiffs = l;
25         sdiffs = s;
26         
27         barcodes = br;
28         primers = pr;
29         revPrimer = r;
30         linker = lk;
31         spacer = sp;
32         maxFBarcodeLength = 0;
33         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34             if(it->first.length() > maxFBarcodeLength){
35                 maxFBarcodeLength = it->first.length();
36             }
37         }
38         maxFPrimerLength = 0;
39         map<string,int>::iterator it;
40         for(it=primers.begin();it!=primers.end();it++){
41             if(it->first.length() > maxFPrimerLength){
42                 maxFPrimerLength = it->first.length();
43             }
44         }
45         
46         maxLinkerLength = 0;
47         for(int i = 0; i < linker.size(); i++){
48             if(linker[i].length() > maxLinkerLength){
49                 maxLinkerLength = linker[i].length();
50             }
51         }
52         
53         maxSpacerLength = 0;
54         for(int i = 0; i < spacer.size(); i++){
55             if(spacer[i].length() > maxSpacerLength){
56                 maxSpacerLength = spacer[i].length();
57             }
58         }
59     }
60     catch(exception& e) {
61         m->errorOut(e, "TrimOligos", "TrimOligos");
62         exit(1);
63     }
64 }
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
68     try {
69         m = MothurOut::getInstance();
70         
71         pdiffs = p;
72         bdiffs = b;
73         ldiffs = l;
74         sdiffs = s;
75         paired = true;
76         
77         maxFBarcodeLength = 0;
78         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79             string forward = it->second.forward;
80             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
81             
82             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
83             
84             if (itForward == ifbarcodes.end()) {
85                 vector<int> temp; temp.push_back(it->first);
86                 ifbarcodes[forward] = temp;
87             }else { itForward->second.push_back(it->first); }
88         }
89         
90         maxFPrimerLength = 0;
91         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92             string forward = it->second.forward;
93             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
94             
95             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
96             
97             if (itForward == ifprimers.end()) {
98                 vector<int> temp; temp.push_back(it->first);
99                 ifprimers[forward] = temp;
100             }else { itForward->second.push_back(it->first); }
101         }
102         
103         maxRBarcodeLength = 0;
104         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105             string forward = it->second.reverse;
106             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
107             
108             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
109             
110             if (itForward == irbarcodes.end()) {
111                 vector<int> temp; temp.push_back(it->first);
112                 irbarcodes[forward] = temp;
113             }else { itForward->second.push_back(it->first); }
114         }
115         
116         maxRPrimerLength = 0;
117         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118             string forward = it->second.reverse;
119             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
120             
121             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
122             
123             if (itForward == irprimers.end()) {
124                 vector<int> temp; temp.push_back(it->first);
125                 irprimers[forward] = temp;
126             }else { itForward->second.push_back(it->first); }
127         }
128         
129         ipbarcodes = br;
130         ipprimers = pr;
131     }
132     catch(exception& e) {
133         m->errorOut(e, "TrimOligos", "TrimOligos");
134         exit(1);
135     }
136 }
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
140     try {
141         m = MothurOut::getInstance();
142         
143         pdiffs = p;
144         bdiffs = b;
145         
146         barcodes = br;
147         primers = pr;
148         revPrimer = r;
149         paired = false;
150         
151         maxFBarcodeLength = 0;
152         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153             string oligo = it->first;
154             if(oligo.length() > maxFBarcodeLength){
155                 maxFBarcodeLength = oligo.length();
156             }
157         }
158         maxRBarcodeLength = maxFBarcodeLength;
159         
160         maxFPrimerLength = 0;
161         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162             string oligo = it->first;
163             if(oligo.length() > maxFPrimerLength){
164                 maxFPrimerLength = oligo.length();
165             }
166         }
167         maxRPrimerLength = maxFPrimerLength;
168     }
169     catch(exception& e) {
170         m->errorOut(e, "TrimOligos", "TrimOligos");
171         exit(1);
172     }
173 }
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
178     try {
179         
180         string rawSequence = seq.getUnaligned();
181         
182         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183             string oligo = it->first;
184             
185             if(rawSequence.length() < oligo.length()) { break; }
186             
187             //search for primer
188             int olength = oligo.length();
189             for (int j = 0; j < rawSequence.length()-olength; j++){
190                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191                 string rawChunk = rawSequence.substr(j, olength);
192                 if(compareDNASeq(oligo, rawChunk)) {
193                     primerStart = j;
194                     primerEnd = primerStart + olength;
195                     return true;
196                 }
197                 
198             }
199         }
200         
201         primerStart = 0; primerEnd = 0;
202         //if you don't want to allow for diffs
203         if (pdiffs == 0) { return false; }
204         else { //try aligning and see if you can find it
205             
206             //can you find the barcode
207             int minDiff = 1e6;
208             int minCount = 1;
209             
210             Alignment* alignment;
211             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212             else{ alignment = NULL; }
213             
214             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
215                 
216                 string prim = it->first;
217                 //search for primer
218                 int olength = prim.length();
219                 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
220                 else{
221                     for (int j = 0; j < rawSequence.length()-olength; j++){
222                         
223                         string oligo = it->first;
224                         
225                         if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
226                         
227                         string rawChunk = rawSequence.substr(j, olength+pdiffs);
228                         
229                         //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230                         alignment->alignPrimer(oligo, rawChunk);
231                         oligo = alignment->getSeqAAln();
232                         string temp = alignment->getSeqBAln();
233                         
234                         int alnLength = oligo.length();
235                         
236                         for(int i=oligo.length()-1;i>=0;i--){
237                             if(oligo[i] != '-'){        alnLength = i+1;        break;  }
238                         }
239                         oligo = oligo.substr(0,alnLength);
240                         temp = temp.substr(0,alnLength);
241                         
242                         int numDiff = countDiffs(oligo, temp);
243                         
244                         if(numDiff < minDiff){
245                             minDiff = numDiff;
246                             minCount = 1;
247                             primerStart = j;
248                             primerEnd = primerStart + alnLength;
249                         }else if(numDiff == minDiff){ minCount++; }
250                     }
251                 }
252             }
253             
254             if (alignment != NULL) { delete alignment; }
255             
256             if(minDiff > pdiffs)        {       primerStart = 0; primerEnd = 0; return false;   }       //no good matches
257             else if(minCount > 1)       {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
258             else{ return true; }
259         }
260         
261         primerStart = 0; primerEnd = 0;
262         return false;
263         
264     }
265     catch(exception& e) {
266         m->errorOut(e, "TrimOligos", "stripForward");
267         exit(1);
268     }
269 }
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
272     try {
273         
274         string rawSequence = seq.getUnaligned();
275         
276         for(int i=0;i<revPrimer.size();i++){
277             string oligo = revPrimer[i];
278             if(rawSequence.length() < oligo.length()) { break; }
279             
280             //search for primer
281             int olength = oligo.length();
282             for (int j = rawSequence.length()-olength; j >= 0; j--){
283                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284                 string rawChunk = rawSequence.substr(j, olength);
285                 
286                 if(compareDNASeq(oligo, rawChunk)) {
287                     primerStart = j;
288                     primerEnd = primerStart + olength;
289                     return true;
290                 }
291                 
292             }
293         }
294         
295         primerStart = 0; primerEnd = 0;
296         return false;
297     }
298     catch(exception& e) {
299         m->errorOut(e, "PcrSeqsCommand", "findReverse");
300         exit(1);
301     }
302 }
303 //*******************************************************************/
304
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
306     try {
307         
308         if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
309         
310         string rawSequence = seq.getUnaligned();
311         int success = bdiffs + 1;       //guilty until proven innocent
312         
313         //can you find the barcode
314         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315             string oligo = it->first;
316             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
317                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
318                 break;
319             }
320             
321             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
322                 group = it->second;
323                 seq.setUnaligned(rawSequence.substr(oligo.length()));
324                 
325                 if(qual.getName() != ""){
326                     qual.trimQScores(oligo.length(), -1);
327                 }
328                 
329                 success = 0;
330                 break;
331             }
332         }
333         
334         //if you found the barcode or if you don't want to allow for diffs
335         if ((bdiffs == 0) || (success == 0)) { return success; }
336         
337         else { //try aligning and see if you can find it
338             Alignment* alignment;
339             if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340             else{ alignment = NULL; }
341             
342             //can you find the barcode
343             int minDiff = 1e6;
344             int minCount = 1;
345             int minGroup = -1;
346             int minPos = 0;
347             
348             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349                 string oligo = it->first;
350                 // int length = oligo.length();
351                 
352                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
353                     success = bdiffs + 10;
354                     break;
355                 }
356                 
357                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359                 oligo = alignment->getSeqAAln();
360                 string temp = alignment->getSeqBAln();
361                 
362                 int alnLength = oligo.length();
363                 
364                 for(int i=oligo.length()-1;i>=0;i--){
365                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
366                 }
367                 oligo = oligo.substr(0,alnLength);
368                 temp = temp.substr(0,alnLength);
369                 int numDiff = countDiffs(oligo, temp);
370                 
371                 if(numDiff < minDiff){
372                     minDiff = numDiff;
373                     minCount = 1;
374                     minGroup = it->second;
375                     minPos = 0;
376                     for(int i=0;i<alnLength;i++){
377                         if(temp[i] != '-'){
378                             minPos++;
379                         }
380                     }
381                 }
382                 else if(numDiff == minDiff){
383                     minCount++;
384                 }
385                 
386             }
387             
388             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
389             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
390             else{       //use the best match
391                 group = minGroup;
392                 seq.setUnaligned(rawSequence.substr(minPos));
393                 
394                 if(qual.getName() != ""){
395                     qual.trimQScores(minPos, -1);
396                 }
397                 success = minDiff;
398             }
399             
400             if (alignment != NULL) { delete alignment; }
401             
402         }
403         
404         return success;
405         
406     }
407     catch(exception& e) {
408         m->errorOut(e, "TrimOligos", "stripBarcode");
409         exit(1);
410     }
411 }
412 //*******************************************************************/
413 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
414     try {
415         //look for forward barcode
416         string rawFSequence = forwardSeq.getUnaligned();
417         string rawRSequence = reverseSeq.getUnaligned();
418         int success = bdiffs + 1;       //guilty until proven innocent
419         
420         //can you find the forward barcode
421         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
422             string foligo = it->second.forward;
423             string roligo = it->second.reverse;
424             
425             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
426                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
427                 break;
428             }
429             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
430                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
431                 break;
432             }
433             
434             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
435                 group = it->first;
436                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
437                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
438                 success = 0;
439                 break;
440             }
441         }
442         
443         //if you found the barcode or if you don't want to allow for diffs
444         if ((bdiffs == 0) || (success == 0)) { return success; }
445         else { //try aligning and see if you can find it
446             Alignment* alignment;
447             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
448             else{ alignment = NULL; }
449             
450             //can you find the barcode
451             int minDiff = 1e6;
452             int minCount = 1;
453             vector< vector<int> > minFGroup;
454             vector<int> minFPos;
455             
456             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
457             /*
458              1 Sarah Westcott
459              2 John Westcott
460              3 Anna Westcott
461              4 Sarah Schloss
462              5 Pat Schloss
463              6 Gail Brown
464              7 Pat Moore
465              only want to look for forward = Sarah, John, Anna, Pat, Gail
466              reverse = Westcott, Schloss, Brown, Moore
467              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
468              */
469             //cout << endl << forwardSeq.getName() << endl;
470             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
471                 string oligo = it->first;
472                 
473                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
474                     success = bdiffs + 10;
475                     break;
476                 }
477                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
478                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
479                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
480                 oligo = alignment->getSeqAAln();
481                 string temp = alignment->getSeqBAln();
482                 
483                 int alnLength = oligo.length();
484                 
485                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
486                 oligo = oligo.substr(0,alnLength);
487                 temp = temp.substr(0,alnLength);
488                 int numDiff = countDiffs(oligo, temp);
489                 
490                 if (alnLength == 0) { numDiff = bdiffs + 100; }
491                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
492                 
493                 if(numDiff < minDiff){
494                     minDiff = numDiff;
495                     minCount = 1;
496                     minFGroup.clear();
497                     minFGroup.push_back(it->second);
498                     int tempminFPos = 0;
499                     minFPos.clear();
500                     for(int i=0;i<alnLength;i++){
501                         if(temp[i] != '-'){
502                             tempminFPos++;
503                         }
504                     }
505                     minFPos.push_back(tempminFPos);
506                 }else if(numDiff == minDiff){
507                     minFGroup.push_back(it->second);
508                     int tempminFPos = 0;
509                     for(int i=0;i<alnLength;i++){
510                         if(temp[i] != '-'){
511                             tempminFPos++;
512                         }
513                     }
514                     minFPos.push_back(tempminFPos);
515                 }
516             }
517             
518             //cout << minDiff << '\t' << minCount << '\t' << endl;
519             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
520             else{
521                 //check for reverse match
522                 if (alignment != NULL) { delete alignment; }
523                 
524                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
525                 else{ alignment = NULL; }
526                 
527                 //can you find the barcode
528                 minDiff = 1e6;
529                 minCount = 1;
530                 vector< vector<int> > minRGroup;
531                 vector<int> minRPos;
532                 
533                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
534                     string oligo = it->first;
535                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
536                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
537                         success = bdiffs + 10;
538                         break;
539                     }
540                     
541                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
542                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
543                     oligo = alignment->getSeqAAln();
544                     string temp = alignment->getSeqBAln();
545                     
546                     int alnLength = oligo.length();
547                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
548                     oligo = oligo.substr(0,alnLength);
549                     temp = temp.substr(0,alnLength);
550                     int numDiff = countDiffs(oligo, temp);
551                     if (alnLength == 0) { numDiff = bdiffs + 100; }
552                     
553                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
554                     if(numDiff < minDiff){
555                         minDiff = numDiff;
556                         minCount = 1;
557                         minRGroup.clear();
558                         minRGroup.push_back(it->second);
559                         int tempminRPos = 0;
560                         minRPos.clear();
561                         for(int i=0;i<alnLength;i++){
562                             if(temp[i] != '-'){
563                                 tempminRPos++;
564                             }
565                         }
566                         minRPos.push_back(tempminRPos);
567                     }else if(numDiff == minDiff){
568                         int tempminRPos = 0;
569                         for(int i=0;i<alnLength;i++){
570                             if(temp[i] != '-'){
571                                 tempminRPos++;
572                             }
573                         }
574                         minRPos.push_back(tempminRPos);
575                         minRGroup.push_back(it->second);
576                     }
577                     
578                 }
579                 
580                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
581                 else {
582                     bool foundMatch = false;
583                     vector<int> matches;
584                     for (int i = 0; i < minFGroup.size(); i++) {
585                         for (int j = 0; j < minFGroup[i].size(); j++) {
586                             for (int k = 0; k < minRGroup.size(); k++) {
587                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
588                             }
589                         }
590                     }
591                     
592                     int fStart = 0;
593                     int rStart = 0;
594                     if (matches.size() == 1) {
595                         foundMatch = true;
596                         group = matches[0];
597                         fStart = minFPos[0];
598                         rStart = minRPos[0];
599                     }
600                     
601                     //we have an acceptable match for the forward and reverse, but do they match?
602                     if (foundMatch) {
603                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
604                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
605                         success = minDiff;
606                     }else { success = bdiffs + 100;     }
607                 }
608             }
609             
610             if (alignment != NULL) { delete alignment; }
611         }
612         
613         return success;
614         
615     }
616     catch(exception& e) {
617         m->errorOut(e, "TrimOligos", "stripIBarcode");
618         exit(1);
619     }
620     
621 }
622 //*******************************************************************/
623 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
624     try {
625         //look for forward barcode
626         string rawFSequence = forwardSeq.getUnaligned();
627         string rawRSequence = reverseSeq.getUnaligned();
628         int success = bdiffs + 1;       //guilty until proven innocent
629         
630         //can you find the forward barcode
631         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
632             string foligo = it->second.forward;
633             string roligo = it->second.reverse;
634             
635             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
636                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
637                 break;
638             }
639             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
640                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
641                 break;
642             }
643             
644             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
645                 group = it->first;
646                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
647                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
648                 forwardQual.trimQScores(foligo.length(), -1);
649                 reverseQual.trimQScores(roligo.length(), -1);
650                 success = 0;
651                 break;
652             }
653         }
654         
655         //if you found the barcode or if you don't want to allow for diffs
656         if ((bdiffs == 0) || (success == 0)) { return success; }
657         else { //try aligning and see if you can find it
658             Alignment* alignment;
659             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
660             else{ alignment = NULL; }
661             
662             //can you find the barcode
663             int minDiff = 1e6;
664             int minCount = 1;
665             vector< vector<int> > minFGroup;
666             vector<int> minFPos;
667             
668             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
669             /*
670              1 Sarah Westcott
671              2 John Westcott
672              3 Anna Westcott
673              4 Sarah Schloss
674              5 Pat Schloss
675              6 Gail Brown
676              7 Pat Moore
677              only want to look for forward = Sarah, John, Anna, Pat, Gail
678              reverse = Westcott, Schloss, Brown, Moore
679              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
680              */
681             //cout << endl << forwardSeq.getName() << endl;
682             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
683                 string oligo = it->first;
684                 
685                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
686                     success = bdiffs + 10;
687                     break;
688                 }
689                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
690                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
691                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
692                 oligo = alignment->getSeqAAln();
693                 string temp = alignment->getSeqBAln();
694                 
695                 int alnLength = oligo.length();
696                 
697                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
698                 oligo = oligo.substr(0,alnLength);
699                 temp = temp.substr(0,alnLength);
700                 int numDiff = countDiffs(oligo, temp);
701                 
702                 if (alnLength == 0) { numDiff = bdiffs + 100; }
703                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
704                 
705                 if(numDiff < minDiff){
706                     minDiff = numDiff;
707                     minCount = 1;
708                     minFGroup.clear();
709                     minFGroup.push_back(it->second);
710                     int tempminFPos = 0;
711                     minFPos.clear();
712                     for(int i=0;i<alnLength;i++){
713                         if(temp[i] != '-'){
714                             tempminFPos++;
715                         }
716                     }
717                     minFPos.push_back(tempminFPos);
718                 }else if(numDiff == minDiff){
719                     minFGroup.push_back(it->second);
720                     int tempminFPos = 0;
721                     for(int i=0;i<alnLength;i++){
722                         if(temp[i] != '-'){
723                             tempminFPos++;
724                         }
725                     }
726                     minFPos.push_back(tempminFPos);
727                 }
728             }
729             
730             //cout << minDiff << '\t' << minCount << '\t' << endl;
731             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
732             else{
733                 //check for reverse match
734                 if (alignment != NULL) { delete alignment; }
735                 
736                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
737                 else{ alignment = NULL; }
738                 
739                 //can you find the barcode
740                 minDiff = 1e6;
741                 minCount = 1;
742                 vector< vector<int> > minRGroup;
743                 vector<int> minRPos;
744                 
745                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
746                     string oligo = it->first;
747                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
748                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
749                         success = bdiffs + 10;
750                         break;
751                     }
752                     
753                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
754                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
755                     oligo = alignment->getSeqAAln();
756                     string temp = alignment->getSeqBAln();
757                     
758                     int alnLength = oligo.length();
759                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
760                     oligo = oligo.substr(0,alnLength);
761                     temp = temp.substr(0,alnLength);
762                     int numDiff = countDiffs(oligo, temp);
763                     if (alnLength == 0) { numDiff = bdiffs + 100; }
764                     
765                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
766                     if(numDiff < minDiff){
767                         minDiff = numDiff;
768                         minCount = 1;
769                         minRGroup.clear();
770                         minRGroup.push_back(it->second);
771                         int tempminRPos = 0;
772                         minRPos.clear();
773                         for(int i=0;i<alnLength;i++){
774                             if(temp[i] != '-'){
775                                 tempminRPos++;
776                             }
777                         }
778                         minRPos.push_back(tempminRPos);
779                     }else if(numDiff == minDiff){
780                         int tempminRPos = 0;
781                         for(int i=0;i<alnLength;i++){
782                             if(temp[i] != '-'){
783                                 tempminRPos++;
784                             }
785                         }
786                         minRPos.push_back(tempminRPos);
787                         minRGroup.push_back(it->second);
788                     }
789                     
790                 }
791                 
792                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
793                 else {
794                     bool foundMatch = false;
795                     vector<int> matches;
796                     for (int i = 0; i < minFGroup.size(); i++) {
797                         for (int j = 0; j < minFGroup[i].size(); j++) {
798                             for (int k = 0; k < minRGroup.size(); k++) {
799                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
800                             }
801                         }
802                     }
803                     
804                     int fStart = 0;
805                     int rStart = 0;
806                     if (matches.size() == 1) {
807                         foundMatch = true;
808                         group = matches[0];
809                         fStart = minFPos[0];
810                         rStart = minRPos[0];
811                     }
812                     
813                     //we have an acceptable match for the forward and reverse, but do they match?
814                     if (foundMatch) {
815                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
816                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
817                         forwardQual.trimQScores(fStart, -1);
818                         reverseQual.trimQScores(rStart, -1);
819                         success = minDiff;
820                     }else { success = bdiffs + 100;     }
821                 }
822             }
823             
824             if (alignment != NULL) { delete alignment; }
825         }
826         
827         return success;
828         
829     }
830     catch(exception& e) {
831         m->errorOut(e, "TrimOligos", "stripIBarcode");
832         exit(1);
833     }
834     
835 }
836 //*******************************************************************/
837 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
838     try {
839         //look for forward barcode
840         string rawSeq = seq.getUnaligned();
841         int success = bdiffs + 1;       //guilty until proven innocent
842         //cout << seq.getName() << endl;
843         //can you find the forward barcode
844         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
845             string foligo = it->second.forward;
846             string roligo = it->second.reverse;
847             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
848             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
849             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
850                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
851                 break;
852             }
853             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
854                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
855                 break;
856             }
857             
858             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
859                 group = it->first;
860                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
861                 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
862                 if(qual.getName() != ""){
863                     qual.trimQScores(-1, rawSeq.length()-roligo.length());
864                     qual.trimQScores(foligo.length(), -1);
865                 }
866                 success = 0;
867                 break;
868             }
869         }
870         //cout << "success=" << success << endl;
871         //if you found the barcode or if you don't want to allow for diffs
872         if ((bdiffs == 0) || (success == 0)) { return success; }
873         else { //try aligning and see if you can find it
874             Alignment* alignment;
875             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
876             else{ alignment = NULL; }
877             
878             //can you find the barcode
879             int minDiff = 1e6;
880             int minCount = 1;
881             vector< vector<int> > minFGroup;
882             vector<int> minFPos;
883             
884             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
885             /*
886              1 Sarah Westcott
887              2 John Westcott
888              3 Anna Westcott
889              4 Sarah Schloss
890              5 Pat Schloss
891              6 Gail Brown
892              7 Pat Moore
893              only want to look for forward = Sarah, John, Anna, Pat, Gail
894              reverse = Westcott, Schloss, Brown, Moore
895              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
896              */
897             //cout << endl << forwardSeq.getName() << endl;
898             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
899                 string oligo = it->first;
900                 
901                 if(rawSeq.length() < maxFBarcodeLength){        //let's just assume that the barcodes are the same length
902                     success = bdiffs + 10;
903                     break;
904                 }
905                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
906                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
907                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
908                 oligo = alignment->getSeqAAln();
909                 string temp = alignment->getSeqBAln();
910                 
911                 int alnLength = oligo.length();
912                 
913                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
914                 oligo = oligo.substr(0,alnLength);
915                 temp = temp.substr(0,alnLength);
916                 int numDiff = countDiffs(oligo, temp);
917                 
918                 if (alnLength == 0) { numDiff = bdiffs + 100; }
919                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
920                 
921                 if(numDiff < minDiff){
922                     minDiff = numDiff;
923                     minCount = 1;
924                     minFGroup.clear();
925                     minFGroup.push_back(it->second);
926                     int tempminFPos = 0;
927                     minFPos.clear();
928                     for(int i=0;i<alnLength;i++){
929                         if(temp[i] != '-'){
930                             tempminFPos++;
931                         }
932                     }
933                     minFPos.push_back(tempminFPos);
934                 }else if(numDiff == minDiff){
935                     minFGroup.push_back(it->second);
936                     int tempminFPos = 0;
937                     for(int i=0;i<alnLength;i++){
938                         if(temp[i] != '-'){
939                             tempminFPos++;
940                         }
941                     }
942                     minFPos.push_back(tempminFPos);
943                 }
944             }
945             
946             //cout << minDiff << '\t' << minCount << '\t' << endl;
947             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
948             else{
949                 //check for reverse match
950                 if (alignment != NULL) { delete alignment; }
951                 
952                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
953                 else{ alignment = NULL; }
954                 
955                 //can you find the barcode
956                 minDiff = 1e6;
957                 minCount = 1;
958                 vector< vector<int> > minRGroup;
959                 vector<int> minRPos;
960                 
961                 string rawRSequence = reverseOligo(seq.getUnaligned());
962                 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
963                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
964                     string oligo = reverseOligo(it->first);
965                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
966                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
967                         success = bdiffs + 10;
968                         break;
969                     }
970                     
971                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
972                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
973                     oligo = alignment->getSeqAAln();
974                     string temp = alignment->getSeqBAln();
975                     
976                     int alnLength = oligo.length();
977                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
978                     oligo = oligo.substr(0,alnLength);
979                     temp = temp.substr(0,alnLength);
980                     int numDiff = countDiffs(oligo, temp);
981                     if (alnLength == 0) { numDiff = bdiffs + 100; }
982                     
983                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
984                     if(numDiff < minDiff){
985                         minDiff = numDiff;
986                         minCount = 1;
987                         minRGroup.clear();
988                         minRGroup.push_back(it->second);
989                         int tempminRPos = 0;
990                         minRPos.clear();
991                         for(int i=0;i<alnLength;i++){
992                             if(temp[i] != '-'){
993                                 tempminRPos++;
994                             }
995                         }
996                         minRPos.push_back(tempminRPos);
997                     }else if(numDiff == minDiff){
998                         int tempminRPos = 0;
999                         for(int i=0;i<alnLength;i++){
1000                             if(temp[i] != '-'){
1001                                 tempminRPos++;
1002                             }
1003                         }
1004                         minRPos.push_back(tempminRPos);
1005                         minRGroup.push_back(it->second);
1006                     }
1007                     
1008                 }
1009                 
1010                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
1011                 else {
1012                     bool foundMatch = false;
1013                     vector<int> matches;
1014                     for (int i = 0; i < minFGroup.size(); i++) {
1015                         for (int j = 0; j < minFGroup[i].size(); j++) {
1016                             for (int k = 0; k < minRGroup.size(); k++) {
1017                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1018                             }
1019                         }
1020                     }
1021                     
1022                     int fStart = 0;
1023                     int rStart = 0;
1024                     if (matches.size() == 1) {
1025                         foundMatch = true;
1026                         group = matches[0];
1027                         fStart = minFPos[0];
1028                         rStart = rawSeq.length() - minRPos[0];
1029                         if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
1030                     }
1031                     
1032                     //we have an acceptable match for the forward and reverse, but do they match?
1033                     if (foundMatch) {
1034                         string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1035                         seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1036                         if(qual.getName() != ""){
1037                             qual.trimQScores(-1, rStart);
1038                             qual.trimQScores(fStart, -1);
1039                         }
1040                         success = minDiff;
1041                         //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1042                     }else { success = bdiffs + 100;     }
1043                 }
1044             }
1045             
1046             if (alignment != NULL) { delete alignment; }
1047         }
1048         
1049         return success;
1050         
1051     }
1052     catch(exception& e) {
1053         m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1054         exit(1);
1055     }
1056     
1057 }
1058 //*******************************************************************/
1059 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1060     try {
1061         //look for forward
1062         string rawSeq = seq.getUnaligned();
1063         int success = pdiffs + 1;       //guilty until proven innocent
1064         //cout << seq.getName() << endl;
1065         //can you find the forward
1066         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1067             string foligo = it->second.forward;
1068             string roligo = it->second.reverse;
1069             
1070             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1071             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1072             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
1073                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1074                 break;
1075             }
1076             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
1077                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1078                 break;
1079             }
1080             
1081             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1082                 group = it->first;
1083                 if (!keepForward) {
1084                     string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1085                     seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1086                     if(qual.getName() != ""){
1087                         qual.trimQScores(-1, rawSeq.length()-roligo.length());
1088                         qual.trimQScores(foligo.length(), -1);
1089                     }
1090                 }
1091                 success = 0;
1092                 break;
1093             }
1094         }
1095         //cout << "success=" << success << endl;
1096         //if you found the barcode or if you don't want to allow for diffs
1097         if ((pdiffs == 0) || (success == 0)) { return success; }
1098         else { //try aligning and see if you can find it
1099             Alignment* alignment;
1100             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1101             else{ alignment = NULL; }
1102             
1103             //can you find the barcode
1104             int minDiff = 1e6;
1105             int minCount = 1;
1106             vector< vector<int> > minFGroup;
1107             vector<int> minFPos;
1108             
1109             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1110             /*
1111              1 Sarah Westcott
1112              2 John Westcott
1113              3 Anna Westcott
1114              4 Sarah Schloss
1115              5 Pat Schloss
1116              6 Gail Brown
1117              7 Pat Moore
1118              only want to look for forward = Sarah, John, Anna, Pat, Gail
1119              reverse = Westcott, Schloss, Brown, Moore
1120              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1121              */
1122             //cout << endl << forwardSeq.getName() << endl;
1123             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1124                 string oligo = it->first;
1125                 
1126                 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1127                     success = pdiffs + 10;
1128                     break;
1129                 }
1130                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
1131                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1132                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1133                 oligo = alignment->getSeqAAln();
1134                 string temp = alignment->getSeqBAln();
1135                 
1136                 int alnLength = oligo.length();
1137                 
1138                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1139                 oligo = oligo.substr(0,alnLength);
1140                 temp = temp.substr(0,alnLength);
1141                 int numDiff = countDiffs(oligo, temp);
1142                 
1143                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1144                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1145                 
1146                 if(numDiff < minDiff){
1147                     minDiff = numDiff;
1148                     minCount = 1;
1149                     minFGroup.clear();
1150                     minFGroup.push_back(it->second);
1151                     int tempminFPos = 0;
1152                     minFPos.clear();
1153                     for(int i=0;i<alnLength;i++){
1154                         if(temp[i] != '-'){
1155                             tempminFPos++;
1156                         }
1157                     }
1158                     minFPos.push_back(tempminFPos);
1159                 }else if(numDiff == minDiff){
1160                     minFGroup.push_back(it->second);
1161                     int tempminFPos = 0;
1162                     for(int i=0;i<alnLength;i++){
1163                         if(temp[i] != '-'){
1164                             tempminFPos++;
1165                         }
1166                     }
1167                     minFPos.push_back(tempminFPos);
1168                 }
1169             }
1170             
1171             //cout << minDiff << '\t' << minCount << '\t' << endl;
1172             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1173             else{
1174                 //check for reverse match
1175                 if (alignment != NULL) { delete alignment; }
1176                 
1177                 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1178                 else{ alignment = NULL; }
1179                 
1180                 //can you find the barcode
1181                 minDiff = 1e6;
1182                 minCount = 1;
1183                 vector< vector<int> > minRGroup;
1184                 vector<int> minRPos;
1185                 
1186                 string rawRSequence = reverseOligo(seq.getUnaligned());
1187                 
1188                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1189                     string oligo = reverseOligo(it->first);
1190                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1191                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1192                         success = pdiffs + 10;
1193                         break;
1194                     }
1195                     
1196                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1197                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1198                     oligo = alignment->getSeqAAln();
1199                     string temp = alignment->getSeqBAln();
1200                     
1201                     int alnLength = oligo.length();
1202                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1203                     oligo = oligo.substr(0,alnLength);
1204                     temp = temp.substr(0,alnLength);
1205                     int numDiff = countDiffs(oligo, temp);
1206                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1207                     
1208                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1209                     if(numDiff < minDiff){
1210                         minDiff = numDiff;
1211                         minCount = 1;
1212                         minRGroup.clear();
1213                         minRGroup.push_back(it->second);
1214                         int tempminRPos = 0;
1215                         minRPos.clear();
1216                         for(int i=0;i<alnLength;i++){
1217                             if(temp[i] != '-'){
1218                                 tempminRPos++;
1219                             }
1220                         }
1221                         minRPos.push_back(tempminRPos);
1222                     }else if(numDiff == minDiff){
1223                         int tempminRPos = 0;
1224                         for(int i=0;i<alnLength;i++){
1225                             if(temp[i] != '-'){
1226                                 tempminRPos++;
1227                             }
1228                         }
1229                         minRPos.push_back(tempminRPos);
1230                         minRGroup.push_back(it->second);
1231                     }
1232                     
1233                 }
1234                 
1235                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1236                 else {
1237                     bool foundMatch = false;
1238                     vector<int> matches;
1239                     for (int i = 0; i < minFGroup.size(); i++) {
1240                         for (int j = 0; j < minFGroup[i].size(); j++) {
1241                             for (int k = 0; k < minRGroup.size(); k++) {
1242                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1243                             }
1244                         }
1245                     }
1246                     
1247                     int fStart = 0;
1248                     int rStart = 0;
1249                     if (matches.size() == 1) {
1250                         foundMatch = true;
1251                         group = matches[0];
1252                         fStart = minFPos[0];
1253                         rStart = rawSeq.length() - minRPos[0];
1254                         if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
1255                     }
1256                     
1257                     //we have an acceptable match for the forward and reverse, but do they match?
1258                     if (foundMatch) {
1259                         if (!keepForward) {
1260                             string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1261                             seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1262                             if(qual.getName() != ""){
1263                                 qual.trimQScores(-1, rStart);
1264                                 qual.trimQScores(fStart, -1);
1265                             }
1266                         }
1267                         success = minDiff;
1268                         //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1269                     }else { success = pdiffs + 100;     }
1270                 }
1271             }
1272             
1273             if (alignment != NULL) { delete alignment; }
1274         }
1275         
1276         return success;
1277         
1278     }
1279     catch(exception& e) {
1280         m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1281         exit(1);
1282     }
1283     
1284 }
1285
1286 //*******************************************************************/
1287 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1288     try {
1289         //look for forward barcode
1290         string rawFSequence = forwardSeq.getUnaligned();
1291         string rawRSequence = reverseSeq.getUnaligned();
1292         int success = pdiffs + 1;       //guilty until proven innocent
1293         
1294         //can you find the forward barcode
1295         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1296             string foligo = it->second.forward;
1297             string roligo = it->second.reverse;
1298             
1299             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1300                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1301                 break;
1302             }
1303             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1304                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1305                 break;
1306             }
1307             
1308             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
1309                 group = it->first;
1310                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1311                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1312                 forwardQual.trimQScores(foligo.length(), -1);
1313                 reverseQual.trimQScores(roligo.length(), -1);
1314                 success = 0;
1315                 break;
1316             }
1317         }
1318         
1319         //if you found the barcode or if you don't want to allow for diffs
1320         if ((pdiffs == 0) || (success == 0)) { return success; }
1321         else { //try aligning and see if you can find it
1322             Alignment* alignment;
1323             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1324             else{ alignment = NULL; }
1325             
1326             //can you find the barcode
1327             int minDiff = 1e6;
1328             int minCount = 1;
1329             vector< vector<int> > minFGroup;
1330             vector<int> minFPos;
1331             
1332             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1333             /*
1334              1 Sarah Westcott
1335              2 John Westcott
1336              3 Anna Westcott
1337              4 Sarah Schloss
1338              5 Pat Schloss
1339              6 Gail Brown
1340              7 Pat Moore
1341              only want to look for forward = Sarah, John, Anna, Pat, Gail
1342              reverse = Westcott, Schloss, Brown, Moore
1343              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1344              */
1345             //cout << endl << forwardSeq.getName() << endl;
1346             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1347                 string oligo = it->first;
1348                 
1349                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1350                     success = pdiffs + 10;
1351                     break;
1352                 }
1353                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1354                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1355                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1356                 oligo = alignment->getSeqAAln();
1357                 string temp = alignment->getSeqBAln();
1358                 
1359                 int alnLength = oligo.length();
1360                 
1361                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1362                 oligo = oligo.substr(0,alnLength);
1363                 temp = temp.substr(0,alnLength);
1364                 int numDiff = countDiffs(oligo, temp);
1365                 
1366                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1367                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1368                 
1369                 if(numDiff < minDiff){
1370                     minDiff = numDiff;
1371                     minCount = 1;
1372                     minFGroup.clear();
1373                     minFGroup.push_back(it->second);
1374                     int tempminFPos = 0;
1375                     minFPos.clear();
1376                     for(int i=0;i<alnLength;i++){
1377                         if(temp[i] != '-'){
1378                             tempminFPos++;
1379                         }
1380                     }
1381                     minFPos.push_back(tempminFPos);
1382                 }else if(numDiff == minDiff){
1383                     minFGroup.push_back(it->second);
1384                     int tempminFPos = 0;
1385                     for(int i=0;i<alnLength;i++){
1386                         if(temp[i] != '-'){
1387                             tempminFPos++;
1388                         }
1389                     }
1390                     minFPos.push_back(tempminFPos);
1391                 }
1392             }
1393             
1394             //cout << minDiff << '\t' << minCount << '\t' << endl;
1395             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1396             else{
1397                 //check for reverse match
1398                 if (alignment != NULL) { delete alignment; }
1399                 
1400                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1401                 else{ alignment = NULL; }
1402                 
1403                 //can you find the barcode
1404                 minDiff = 1e6;
1405                 minCount = 1;
1406                 vector< vector<int> > minRGroup;
1407                 vector<int> minRPos;
1408                 
1409                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1410                     string oligo = it->first;
1411                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1412                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1413                         success = pdiffs + 10;
1414                         break;
1415                     }
1416                     
1417                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1418                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1419                     oligo = alignment->getSeqAAln();
1420                     string temp = alignment->getSeqBAln();
1421                     
1422                     int alnLength = oligo.length();
1423                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1424                     oligo = oligo.substr(0,alnLength);
1425                     temp = temp.substr(0,alnLength);
1426                     int numDiff = countDiffs(oligo, temp);
1427                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1428                     
1429                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1430                     if(numDiff < minDiff){
1431                         minDiff = numDiff;
1432                         minCount = 1;
1433                         minRGroup.clear();
1434                         minRGroup.push_back(it->second);
1435                         int tempminRPos = 0;
1436                         minRPos.clear();
1437                         for(int i=0;i<alnLength;i++){
1438                             if(temp[i] != '-'){
1439                                 tempminRPos++;
1440                             }
1441                         }
1442                         minRPos.push_back(tempminRPos);
1443                     }else if(numDiff == minDiff){
1444                         int tempminRPos = 0;
1445                         for(int i=0;i<alnLength;i++){
1446                             if(temp[i] != '-'){
1447                                 tempminRPos++;
1448                             }
1449                         }
1450                         minRPos.push_back(tempminRPos);
1451                         minRGroup.push_back(it->second);
1452                     }
1453                     
1454                 }
1455                 
1456                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1457                 else {
1458                     bool foundMatch = false;
1459                     vector<int> matches;
1460                     for (int i = 0; i < minFGroup.size(); i++) {
1461                         for (int j = 0; j < minFGroup[i].size(); j++) {
1462                             for (int k = 0; k < minRGroup.size(); k++) {
1463                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1464                             }
1465                         }
1466                     }
1467                     
1468                     int fStart = 0;
1469                     int rStart = 0;
1470                     if (matches.size() == 1) {
1471                         foundMatch = true;
1472                         group = matches[0];
1473                         fStart = minFPos[0];
1474                         rStart = minRPos[0];
1475                     }
1476                     
1477                     //we have an acceptable match for the forward and reverse, but do they match?
1478                     if (foundMatch) {
1479                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1480                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1481                         forwardQual.trimQScores(fStart, -1);
1482                         reverseQual.trimQScores(rStart, -1);
1483                         success = minDiff;
1484                     }else { success = pdiffs + 100;     }
1485                 }
1486             }
1487             
1488             if (alignment != NULL) { delete alignment; }
1489         }
1490         
1491         return success;
1492         
1493     }
1494     catch(exception& e) {
1495         m->errorOut(e, "TrimOligos", "stripIForward");
1496         exit(1);
1497     }
1498     
1499 }
1500 //*******************************************************************/
1501 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1502     try {
1503         //look for forward barcode
1504         string rawFSequence = forwardSeq.getUnaligned();
1505         string rawRSequence = reverseSeq.getUnaligned();
1506         int success = pdiffs + 1;       //guilty until proven innocent
1507         
1508         //can you find the forward barcode
1509         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1510             string foligo = it->second.forward;
1511             string roligo = it->second.reverse;
1512             
1513             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1514                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1515                 break;
1516             }
1517             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1518                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1519                 break;
1520             }
1521             
1522             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1523                 group = it->first;
1524                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1525                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1526                 success = 0;
1527                 break;
1528             }
1529         }
1530         
1531         //if you found the barcode or if you don't want to allow for diffs
1532         if ((pdiffs == 0) || (success == 0)) { return success; }
1533         else { //try aligning and see if you can find it
1534             Alignment* alignment;
1535             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1536             else{ alignment = NULL; }
1537             
1538             //can you find the barcode
1539             int minDiff = 1e6;
1540             int minCount = 1;
1541             vector< vector<int> > minFGroup;
1542             vector<int> minFPos;
1543             
1544             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1545             /*
1546              1 Sarah Westcott
1547              2 John Westcott
1548              3 Anna Westcott
1549              4 Sarah Schloss
1550              5 Pat Schloss
1551              6 Gail Brown
1552              7 Pat Moore
1553              only want to look for forward = Sarah, John, Anna, Pat, Gail
1554              reverse = Westcott, Schloss, Brown, Moore
1555              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1556              */
1557             //cout << endl << forwardSeq.getName() << endl;
1558             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1559                 string oligo = it->first;
1560                 
1561                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1562                     success = pdiffs + 10;
1563                     break;
1564                 }
1565                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1566                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1567                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1568                 oligo = alignment->getSeqAAln();
1569                 string temp = alignment->getSeqBAln();
1570                 
1571                 int alnLength = oligo.length();
1572                 
1573                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1574                 oligo = oligo.substr(0,alnLength);
1575                 temp = temp.substr(0,alnLength);
1576                 int numDiff = countDiffs(oligo, temp);
1577                 
1578                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1579                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1580                 
1581                 if(numDiff < minDiff){
1582                     minDiff = numDiff;
1583                     minCount = 1;
1584                     minFGroup.clear();
1585                     minFGroup.push_back(it->second);
1586                     int tempminFPos = 0;
1587                     minFPos.clear();
1588                     for(int i=0;i<alnLength;i++){
1589                         if(temp[i] != '-'){
1590                             tempminFPos++;
1591                         }
1592                     }
1593                     minFPos.push_back(tempminFPos);
1594                 }else if(numDiff == minDiff){
1595                     minFGroup.push_back(it->second);
1596                     int tempminFPos = 0;
1597                     for(int i=0;i<alnLength;i++){
1598                         if(temp[i] != '-'){
1599                             tempminFPos++;
1600                         }
1601                     }
1602                     minFPos.push_back(tempminFPos);
1603                 }
1604             }
1605             
1606             //cout << minDiff << '\t' << minCount << '\t' << endl;
1607             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1608             else{
1609                 //check for reverse match
1610                 if (alignment != NULL) { delete alignment; }
1611                 
1612                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1613                 else{ alignment = NULL; }
1614                 
1615                 //can you find the barcode
1616                 minDiff = 1e6;
1617                 minCount = 1;
1618                 vector< vector<int> > minRGroup;
1619                 vector<int> minRPos;
1620                 
1621                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1622                     string oligo = it->first;
1623                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1624                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1625                         success = pdiffs + 10;
1626                         break;
1627                     }
1628                     
1629                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1630                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1631                     oligo = alignment->getSeqAAln();
1632                     string temp = alignment->getSeqBAln();
1633                     
1634                     int alnLength = oligo.length();
1635                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1636                     oligo = oligo.substr(0,alnLength);
1637                     temp = temp.substr(0,alnLength);
1638                     int numDiff = countDiffs(oligo, temp);
1639                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1640                     
1641                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1642                     if(numDiff < minDiff){
1643                         minDiff = numDiff;
1644                         minCount = 1;
1645                         minRGroup.clear();
1646                         minRGroup.push_back(it->second);
1647                         int tempminRPos = 0;
1648                         minRPos.clear();
1649                         for(int i=0;i<alnLength;i++){
1650                             if(temp[i] != '-'){
1651                                 tempminRPos++;
1652                             }
1653                         }
1654                         minRPos.push_back(tempminRPos);
1655                     }else if(numDiff == minDiff){
1656                         int tempminRPos = 0;
1657                         for(int i=0;i<alnLength;i++){
1658                             if(temp[i] != '-'){
1659                                 tempminRPos++;
1660                             }
1661                         }
1662                         minRPos.push_back(tempminRPos);
1663                         minRGroup.push_back(it->second);
1664                     }
1665                     
1666                 }
1667                 
1668                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1669                 else {
1670                     bool foundMatch = false;
1671                     vector<int> matches;
1672                     for (int i = 0; i < minFGroup.size(); i++) {
1673                         for (int j = 0; j < minFGroup[i].size(); j++) {
1674                             for (int k = 0; k < minRGroup.size(); k++) {
1675                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1676                             }
1677                         }
1678                     }
1679                     
1680                     int fStart = 0;
1681                     int rStart = 0;
1682                     if (matches.size() == 1) {
1683                         foundMatch = true;
1684                         group = matches[0];
1685                         fStart = minFPos[0];
1686                         rStart = minRPos[0];
1687                     }
1688                     
1689                     //we have an acceptable match for the forward and reverse, but do they match?
1690                     if (foundMatch) {
1691                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1692                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1693                         success = minDiff;
1694                     }else { success = pdiffs + 100;     }
1695                 }
1696             }
1697             
1698             if (alignment != NULL) { delete alignment; }
1699         }
1700         
1701         return success;
1702         
1703     }
1704     catch(exception& e) {
1705         m->errorOut(e, "TrimOligos", "stripIForward");
1706         exit(1);
1707     }
1708     
1709 }
1710 //*******************************************************************/
1711 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1712     try {
1713         
1714         string rawSequence = seq.getUnaligned();
1715         int success = bdiffs + 1;       //guilty until proven innocent
1716         
1717         //can you find the barcode
1718         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1719             string oligo = it->first;
1720             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
1721                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1722                 break;
1723             }
1724             
1725             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1726                 group = it->second;
1727                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1728                 
1729                 success = 0;
1730                 break;
1731             }
1732         }
1733         
1734         //if you found the barcode or if you don't want to allow for diffs
1735         if ((bdiffs == 0) || (success == 0)) { return success; }
1736         
1737         else { //try aligning and see if you can find it
1738             Alignment* alignment;
1739             if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1740             else{ alignment = NULL; }
1741             
1742             //can you find the barcode
1743             int minDiff = 1e6;
1744             int minCount = 1;
1745             int minGroup = -1;
1746             int minPos = 0;
1747             
1748             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1749                 string oligo = it->first;
1750                 // int length = oligo.length();
1751                 
1752                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
1753                     success = bdiffs + 10;
1754                     break;
1755                 }
1756                 
1757                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1758                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1759                 oligo = alignment->getSeqAAln();
1760                 string temp = alignment->getSeqBAln();
1761                 
1762                 int alnLength = oligo.length();
1763                 
1764                 for(int i=oligo.length()-1;i>=0;i--){
1765                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1766                 }
1767                 oligo = oligo.substr(0,alnLength);
1768                 temp = temp.substr(0,alnLength);
1769                 
1770                 int numDiff = countDiffs(oligo, temp);
1771                 
1772                 if(numDiff < minDiff){
1773                     minDiff = numDiff;
1774                     minCount = 1;
1775                     minGroup = it->second;
1776                     minPos = 0;
1777                     for(int i=0;i<alnLength;i++){
1778                         if(temp[i] != '-'){
1779                             minPos++;
1780                         }
1781                     }
1782                 }
1783                 else if(numDiff == minDiff){
1784                     minCount++;
1785                 }
1786                 
1787             }
1788             
1789             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
1790             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1791             else{       //use the best match
1792                 group = minGroup;
1793                 seq.setUnaligned(rawSequence.substr(minPos));
1794                 success = minDiff;
1795             }
1796             
1797             if (alignment != NULL) { delete alignment; }
1798             
1799         }
1800         
1801         return success;
1802         
1803     }
1804     catch(exception& e) {
1805         m->errorOut(e, "TrimOligos", "stripBarcode");
1806         exit(1);
1807     }
1808     
1809 }
1810
1811 /********************************************************************/
1812 int TrimOligos::stripForward(Sequence& seq, int& group){
1813     try {
1814         
1815         string rawSequence = seq.getUnaligned();
1816         int success = pdiffs + 1;       //guilty until proven innocent
1817         
1818         //can you find the primer
1819         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1820             string oligo = it->first;
1821             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
1822                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1823                 break;
1824             }
1825             
1826             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1827                 group = it->second;
1828                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1829                 success = 0;
1830                 break;
1831             }
1832         }
1833         
1834         //if you found the barcode or if you don't want to allow for diffs
1835         if ((pdiffs == 0) || (success == 0)) {  return success; }
1836         
1837         else { //try aligning and see if you can find it
1838             Alignment* alignment;
1839             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1840             else{ alignment = NULL; }
1841             
1842             //can you find the barcode
1843             int minDiff = 1e6;
1844             int minCount = 1;
1845             int minGroup = -1;
1846             int minPos = 0;
1847             
1848             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1849                 string oligo = it->first;
1850                 // int length = oligo.length();
1851                 
1852                 if(rawSequence.length() < maxFPrimerLength){
1853                     success = pdiffs + 100;
1854                     break;
1855                 }
1856                 
1857                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1858                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1859                 oligo = alignment->getSeqAAln();
1860                 string temp = alignment->getSeqBAln();
1861                 
1862                 int alnLength = oligo.length();
1863                 
1864                 for(int i=oligo.length()-1;i>=0;i--){
1865                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1866                 }
1867                 oligo = oligo.substr(0,alnLength);
1868                 temp = temp.substr(0,alnLength);
1869                 
1870                 int numDiff = countDiffs(oligo, temp);
1871                 
1872                 if(numDiff < minDiff){
1873                     minDiff = numDiff;
1874                     minCount = 1;
1875                     minGroup = it->second;
1876                     minPos = 0;
1877                     for(int i=0;i<alnLength;i++){
1878                         if(temp[i] != '-'){
1879                             minPos++;
1880                         }
1881                     }
1882                 }
1883                 else if(numDiff == minDiff){
1884                     minCount++;
1885                 }
1886                 
1887             }
1888             
1889             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1890             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1891             else{       //use the best match
1892                 group = minGroup;
1893                 seq.setUnaligned(rawSequence.substr(minPos));
1894                 success = minDiff;
1895             }
1896             
1897             if (alignment != NULL) { delete alignment; }
1898             
1899         }
1900         
1901         return success;
1902         
1903     }
1904     catch(exception& e) {
1905         m->errorOut(e, "TrimOligos", "stripForward");
1906         exit(1);
1907     }
1908 }
1909 //*******************************************************************/
1910 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1911     try {
1912         
1913         if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
1914         
1915         string rawSequence = seq.getUnaligned();
1916         int success = pdiffs + 1;       //guilty until proven innocent
1917         
1918         //can you find the primer
1919         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1920             string oligo = it->first;
1921             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
1922                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1923                 break;
1924             }
1925             
1926             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1927                 group = it->second;
1928                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1929                 if(qual.getName() != ""){
1930                     if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
1931                 }
1932                 success = 0;
1933                 break;
1934             }
1935         }
1936         
1937         //if you found the barcode or if you don't want to allow for diffs
1938         if ((pdiffs == 0) || (success == 0)) { return success; }
1939         
1940         else { //try aligning and see if you can find it
1941             Alignment* alignment;
1942             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1943             else{ alignment = NULL; }
1944             
1945             //can you find the barcode
1946             int minDiff = 1e6;
1947             int minCount = 1;
1948             int minGroup = -1;
1949             int minPos = 0;
1950             
1951             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1952                 string oligo = it->first;
1953                 // int length = oligo.length();
1954                 
1955                 if(rawSequence.length() < maxFPrimerLength){
1956                     success = pdiffs + 100;
1957                     break;
1958                 }
1959                 
1960                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1961                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1962                 oligo = alignment->getSeqAAln();
1963                 string temp = alignment->getSeqBAln();
1964                 
1965                 int alnLength = oligo.length();
1966                 
1967                 for(int i=oligo.length()-1;i>=0;i--){
1968                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1969                 }
1970                 oligo = oligo.substr(0,alnLength);
1971                 temp = temp.substr(0,alnLength);
1972                 
1973                 int numDiff = countDiffs(oligo, temp);
1974                 
1975                 if(numDiff < minDiff){
1976                     minDiff = numDiff;
1977                     minCount = 1;
1978                     minGroup = it->second;
1979                     minPos = 0;
1980                     for(int i=0;i<alnLength;i++){
1981                         if(temp[i] != '-'){
1982                             minPos++;
1983                         }
1984                     }
1985                 }
1986                 else if(numDiff == minDiff){
1987                     minCount++;
1988                 }
1989                 
1990             }
1991             
1992             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1993             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1994             else{       //use the best match
1995                 group = minGroup;
1996                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
1997                 if(qual.getName() != ""){
1998                     if (!keepForward) { qual.trimQScores(minPos, -1); }
1999                 }
2000                 success = minDiff;
2001             }
2002             
2003             if (alignment != NULL) { delete alignment; }
2004             
2005         }
2006         
2007         return success;
2008         
2009     }
2010     catch(exception& e) {
2011         m->errorOut(e, "TrimOligos", "stripForward");
2012         exit(1);
2013     }
2014 }
2015 //******************************************************************/
2016 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2017     try {
2018         string rawSequence = seq.getUnaligned();
2019         bool success = 0;       //guilty until proven innocent
2020         
2021         for(int i=0;i<revPrimer.size();i++){
2022             string oligo = revPrimer[i];
2023             
2024             if(rawSequence.length() < oligo.length()){
2025                 success = 0;
2026                 break;
2027             }
2028             
2029             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2030                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2031                 if(qual.getName() != ""){
2032                     qual.trimQScores(-1, rawSequence.length()-oligo.length());
2033                 }
2034                 success = 1;
2035                 break;
2036             }
2037         }       
2038         return success;
2039         
2040     }
2041     catch(exception& e) {
2042         m->errorOut(e, "TrimOligos", "stripReverse");
2043         exit(1);
2044     }
2045 }
2046 //******************************************************************/
2047 bool TrimOligos::stripReverse(Sequence& seq){
2048     try {
2049         
2050         string rawSequence = seq.getUnaligned();
2051         bool success = 0;       //guilty until proven innocent
2052         
2053         for(int i=0;i<revPrimer.size();i++){
2054             string oligo = revPrimer[i];
2055             
2056             if(rawSequence.length() < oligo.length()){
2057                 success = 0;
2058                 break;
2059             }
2060             
2061             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2062                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2063                 success = 1;
2064                 break;
2065             }
2066         }       
2067         
2068         return success;
2069         
2070     }
2071     catch(exception& e) {
2072         m->errorOut(e, "TrimOligos", "stripReverse");
2073         exit(1);
2074     }
2075 }
2076 //******************************************************************/
2077 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2078     try {
2079         string rawSequence = seq.getUnaligned();
2080         bool success = ldiffs + 1;      //guilty until proven innocent
2081         
2082         for(int i=0;i<linker.size();i++){
2083             string oligo = linker[i];
2084             
2085             if(rawSequence.length() < oligo.length()){
2086                 success = ldiffs + 10;
2087                 break;
2088             }
2089             
2090             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2091                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2092                 if(qual.getName() != ""){
2093                     qual.trimQScores(oligo.length(), -1);
2094                 }
2095                 success = 0;
2096                 break;
2097             }
2098         }
2099         
2100         //if you found the linker or if you don't want to allow for diffs
2101         if ((ldiffs == 0) || (success == 0)) { return success; }
2102         
2103         else { //try aligning and see if you can find it
2104             Alignment* alignment;
2105             if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }   
2106             else{ alignment = NULL; }
2107             
2108             //can you find the barcode
2109             int minDiff = 1e6;
2110             int minCount = 1;
2111             int minPos = 0;
2112             
2113             for(int i = 0; i < linker.size(); i++){
2114                 string oligo = linker[i];
2115                 // int length = oligo.length();
2116                 
2117                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2118                     success = ldiffs + 10;
2119                     break;
2120                 }
2121                 
2122                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2123                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2124                 oligo = alignment->getSeqAAln();
2125                 string temp = alignment->getSeqBAln();
2126                 
2127                 int alnLength = oligo.length();
2128                 
2129                 for(int i=oligo.length()-1;i>=0;i--){
2130                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2131                 }
2132                 oligo = oligo.substr(0,alnLength);
2133                 temp = temp.substr(0,alnLength);
2134                 
2135                 int numDiff = countDiffs(oligo, temp);
2136                 
2137                 if(numDiff < minDiff){
2138                     minDiff = numDiff;
2139                     minCount = 1;
2140                     minPos = 0;
2141                     for(int i=0;i<alnLength;i++){
2142                         if(temp[i] != '-'){
2143                             minPos++;
2144                         }
2145                     }
2146                 }
2147                 else if(numDiff == minDiff){
2148                     minCount++;
2149                 }
2150                 
2151             }
2152             
2153             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2154             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2155             else{       //use the best match
2156                 seq.setUnaligned(rawSequence.substr(minPos));
2157                 
2158                 if(qual.getName() != ""){
2159                     qual.trimQScores(minPos, -1);
2160                 }
2161                 success = minDiff;
2162             }
2163             
2164             if (alignment != NULL) { delete alignment; }
2165             
2166         }
2167         
2168         
2169         return success;
2170         
2171     }
2172     catch(exception& e) {
2173         m->errorOut(e, "TrimOligos", "stripLinker");
2174         exit(1);
2175     }
2176 }
2177 //******************************************************************/
2178 bool TrimOligos::stripLinker(Sequence& seq){
2179     try {
2180         
2181         string rawSequence = seq.getUnaligned();
2182         bool success = ldiffs +1;       //guilty until proven innocent
2183         
2184         for(int i=0;i<linker.size();i++){
2185             string oligo = linker[i];
2186             
2187             if(rawSequence.length() < oligo.length()){
2188                 success = ldiffs +10;
2189                 break;
2190             }
2191             
2192             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2193                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2194                 success = 0;
2195                 break;
2196             }
2197         }       
2198         
2199         //if you found the linker or if you don't want to allow for diffs
2200         if ((ldiffs == 0) || (success == 0)) { return success; }
2201         
2202         else { //try aligning and see if you can find it
2203             Alignment* alignment;
2204             if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2205             else{ alignment = NULL; }
2206             
2207             //can you find the barcode
2208             int minDiff = 1e6;
2209             int minCount = 1;
2210             int minPos = 0;
2211             
2212             for(int i = 0; i < linker.size(); i++){
2213                 string oligo = linker[i];
2214                 // int length = oligo.length();
2215                 
2216                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2217                     success = ldiffs + 10;
2218                     break;
2219                 }
2220                 
2221                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2222                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2223                 oligo = alignment->getSeqAAln();
2224                 string temp = alignment->getSeqBAln();
2225                 
2226                 int alnLength = oligo.length();
2227                 
2228                 for(int i=oligo.length()-1;i>=0;i--){
2229                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2230                 }
2231                 oligo = oligo.substr(0,alnLength);
2232                 temp = temp.substr(0,alnLength);
2233                 
2234                 int numDiff = countDiffs(oligo, temp);
2235                 
2236                 if(numDiff < minDiff){
2237                     minDiff = numDiff;
2238                     minCount = 1;
2239                     minPos = 0;
2240                     for(int i=0;i<alnLength;i++){
2241                         if(temp[i] != '-'){
2242                             minPos++;
2243                         }
2244                     }
2245                 }
2246                 else if(numDiff == minDiff){
2247                     minCount++;
2248                 }
2249                 
2250             }
2251             
2252             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2253             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2254             else{       //use the best match
2255                 seq.setUnaligned(rawSequence.substr(minPos));
2256                 success = minDiff;
2257             }
2258             
2259             if (alignment != NULL) { delete alignment; }
2260             
2261         }
2262         
2263         return success;
2264         
2265     }
2266     catch(exception& e) {
2267         m->errorOut(e, "TrimOligos", "stripLinker");
2268         exit(1);
2269     }
2270 }
2271
2272 //******************************************************************/
2273 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2274     try {
2275         string rawSequence = seq.getUnaligned();
2276         bool success = sdiffs+1;        //guilty until proven innocent
2277         
2278         for(int i=0;i<spacer.size();i++){
2279             string oligo = spacer[i];
2280             
2281             if(rawSequence.length() < oligo.length()){
2282                 success = sdiffs+10;
2283                 break;
2284             }
2285             
2286             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2287                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2288                 if(qual.getName() != ""){
2289                     qual.trimQScores(oligo.length(), -1);
2290                 }
2291                 success = 0;
2292                 break;
2293             }
2294         }
2295         
2296         //if you found the spacer or if you don't want to allow for diffs
2297         if ((sdiffs == 0) || (success == 0)) { return success; }
2298         
2299         else { //try aligning and see if you can find it
2300             Alignment* alignment;
2301             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2302             else{ alignment = NULL; }
2303             
2304             //can you find the barcode
2305             int minDiff = 1e6;
2306             int minCount = 1;
2307             int minPos = 0;
2308             
2309             for(int i = 0; i < spacer.size(); i++){
2310                 string oligo = spacer[i];
2311                 // int length = oligo.length();
2312                 
2313                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2314                     success = sdiffs + 10;
2315                     break;
2316                 }
2317                 
2318                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2319                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2320                 oligo = alignment->getSeqAAln();
2321                 string temp = alignment->getSeqBAln();
2322                 
2323                 int alnLength = oligo.length();
2324                 
2325                 for(int i=oligo.length()-1;i>=0;i--){
2326                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2327                 }
2328                 oligo = oligo.substr(0,alnLength);
2329                 temp = temp.substr(0,alnLength);
2330                 
2331                 int numDiff = countDiffs(oligo, temp);
2332                 
2333                 if(numDiff < minDiff){
2334                     minDiff = numDiff;
2335                     minCount = 1;
2336                     minPos = 0;
2337                     for(int i=0;i<alnLength;i++){
2338                         if(temp[i] != '-'){
2339                             minPos++;
2340                         }
2341                     }
2342                 }
2343                 else if(numDiff == minDiff){
2344                     minCount++;
2345                 }
2346                 
2347             }
2348             
2349             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2350             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2351             else{       //use the best match
2352                 seq.setUnaligned(rawSequence.substr(minPos));
2353                 
2354                 if(qual.getName() != ""){
2355                     qual.trimQScores(minPos, -1);
2356                 }
2357                 success = minDiff;
2358             }
2359             
2360             if (alignment != NULL) { delete alignment; }
2361             
2362         }
2363         
2364         
2365         return success;
2366         
2367     }
2368     catch(exception& e) {
2369         m->errorOut(e, "TrimOligos", "stripSpacer");
2370         exit(1);
2371     }
2372 }
2373 //******************************************************************/
2374 bool TrimOligos::stripSpacer(Sequence& seq){
2375     try {
2376         
2377         string rawSequence = seq.getUnaligned();
2378         bool success = sdiffs+1;        //guilty until proven innocent
2379         
2380         for(int i=0;i<spacer.size();i++){
2381             string oligo = spacer[i];
2382             
2383             if(rawSequence.length() < oligo.length()){
2384                 success = sdiffs+10;
2385                 break;
2386             }
2387             
2388             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2389                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2390                 success = 0;
2391                 break;
2392             }
2393         }       
2394         
2395         //if you found the spacer or if you don't want to allow for diffs
2396         if ((sdiffs == 0) || (success == 0)) { return success; }
2397         
2398         else { //try aligning and see if you can find it
2399             Alignment* alignment;
2400             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2401             else{ alignment = NULL; }
2402             
2403             //can you find the barcode
2404             int minDiff = 1e6;
2405             int minCount = 1;
2406             int minPos = 0;
2407             
2408             for(int i = 0; i < spacer.size(); i++){
2409                 string oligo = spacer[i];
2410                 // int length = oligo.length();
2411                 
2412                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2413                     success = sdiffs + 10;
2414                     break;
2415                 }
2416                 
2417                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2418                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2419                 oligo = alignment->getSeqAAln();
2420                 string temp = alignment->getSeqBAln();
2421                 
2422                 int alnLength = oligo.length();
2423                 
2424                 for(int i=oligo.length()-1;i>=0;i--){
2425                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2426                 }
2427                 oligo = oligo.substr(0,alnLength);
2428                 temp = temp.substr(0,alnLength);
2429                 
2430                 int numDiff = countDiffs(oligo, temp);
2431                 
2432                 if(numDiff < minDiff){
2433                     minDiff = numDiff;
2434                     minCount = 1;
2435                     minPos = 0;
2436                     for(int i=0;i<alnLength;i++){
2437                         if(temp[i] != '-'){
2438                             minPos++;
2439                         }
2440                     }
2441                 }
2442                 else if(numDiff == minDiff){
2443                     minCount++;
2444                 }
2445                 
2446             }
2447             
2448             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2449             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2450             else{       //use the best match
2451                 seq.setUnaligned(rawSequence.substr(minPos));
2452                 success = minDiff;
2453             }
2454             
2455             if (alignment != NULL) { delete alignment; }
2456             
2457         }
2458         
2459         return success;
2460         
2461     }
2462     catch(exception& e) {
2463         m->errorOut(e, "TrimOligos", "stripSpacer");
2464         exit(1);
2465     }
2466 }
2467
2468 //******************************************************************/
2469 bool TrimOligos::compareDNASeq(string oligo, string seq){
2470     try {
2471         bool success = 1;
2472         int length = oligo.length();
2473         
2474         for(int i=0;i<length;i++){
2475             
2476             if(oligo[i] != seq[i]){
2477                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0; }
2478                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       success = 0;    }
2479                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       success = 0;    }
2480                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       success = 0;    }
2481                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       success = 0;    }
2482                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       success = 0;    }
2483                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       success = 0;    }
2484                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       success = 0;    }
2485                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2486                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2487                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
2488                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }       
2489                 
2490                 if(success == 0)        {       break;  }
2491             }
2492             else{
2493                 success = 1;
2494             }
2495         }
2496         
2497         return success;
2498     }
2499     catch(exception& e) {
2500         m->errorOut(e, "TrimOligos", "compareDNASeq");
2501         exit(1);
2502     }
2503     
2504 }
2505 //********************************************************************/
2506 int TrimOligos::countDiffs(string oligo, string seq){
2507     try {
2508         
2509         int length = oligo.length();
2510         int countDiffs = 0;
2511         
2512         for(int i=0;i<length;i++){
2513             
2514             if(oligo[i] != seq[i]){
2515                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++; }
2516                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       countDiffs++;   }
2517                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       countDiffs++;   }
2518                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       countDiffs++;   }
2519                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       countDiffs++;   }
2520                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       countDiffs++;   }
2521                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       countDiffs++;   }
2522                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       countDiffs++;   }
2523                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2524                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2525                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
2526                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
2527             }
2528             
2529         }
2530         
2531         return countDiffs;
2532     }
2533     catch(exception& e) {
2534         m->errorOut(e, "TrimOligos", "countDiffs");
2535         exit(1);
2536     }
2537 }
2538 //********************************************************************/
2539 string TrimOligos::reverseOligo(string oligo){
2540     try {
2541         string reverse = "";
2542         
2543         for(int i=oligo.length()-1;i>=0;i--){
2544             
2545             if(oligo[i] == 'A') {       reverse += 'T'; }
2546             else if(oligo[i] == 'T'){   reverse += 'A'; }
2547             else if(oligo[i] == 'U'){   reverse += 'A'; }
2548             
2549             else if(oligo[i] == 'G'){   reverse += 'C'; }
2550             else if(oligo[i] == 'C'){   reverse += 'G'; }
2551             
2552             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2553             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2554             
2555             else if(oligo[i] == 'M'){   reverse += 'K'; }
2556             else if(oligo[i] == 'K'){   reverse += 'M'; }
2557             
2558             else if(oligo[i] == 'W'){   reverse += 'W'; }
2559             else if(oligo[i] == 'S'){   reverse += 'S'; }
2560             
2561             else if(oligo[i] == 'B'){   reverse += 'V'; }
2562             else if(oligo[i] == 'V'){   reverse += 'B'; }
2563             
2564             else if(oligo[i] == 'D'){   reverse += 'H'; }
2565             else if(oligo[i] == 'H'){   reverse += 'D'; }
2566             
2567             else if(oligo[i] == '-'){   reverse += '-'; }
2568             else        {       reverse += 'N'; }
2569         }
2570         
2571         
2572         return reverse;
2573     }
2574     catch(exception& e) {
2575         m->errorOut(e, "TrimOligos", "reverseOligo");
2576         exit(1);
2577     }
2578 }
2579
2580 /********************************************************************/
2581
2582