]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
modifications to commands to add headers to list file.
[mothur.git] / trimoligos.cpp
1 /*
2  * trimoligos.cpp
3  * Mothur
4  *
5  * Created by westcott on 9/1/11.
6  * Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18     try {
19         m = MothurOut::getInstance();
20         paired = false;
21         
22         pdiffs = p;
23         bdiffs = b;
24         ldiffs = l;
25         sdiffs = s;
26         
27         barcodes = br;
28         primers = pr;
29         revPrimer = r;
30         linker = lk;
31         spacer = sp;
32         maxFBarcodeLength = 0;
33         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34             if(it->first.length() > maxFBarcodeLength){
35                 maxFBarcodeLength = it->first.length();
36             }
37         }
38         maxFPrimerLength = 0;
39         map<string,int>::iterator it;
40         for(it=primers.begin();it!=primers.end();it++){
41             if(it->first.length() > maxFPrimerLength){
42                 maxFPrimerLength = it->first.length();
43             }
44         }
45         
46         maxLinkerLength = 0;
47         for(int i = 0; i < linker.size(); i++){
48             if(linker[i].length() > maxLinkerLength){
49                 maxLinkerLength = linker[i].length();
50             }
51         }
52         
53         maxSpacerLength = 0;
54         for(int i = 0; i < spacer.size(); i++){
55             if(spacer[i].length() > maxSpacerLength){
56                 maxSpacerLength = spacer[i].length();
57             }
58         }
59     }
60     catch(exception& e) {
61         m->errorOut(e, "TrimOligos", "TrimOligos");
62         exit(1);
63     }
64 }
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
68     try {
69         m = MothurOut::getInstance();
70         
71         pdiffs = p;
72         bdiffs = b;
73         ldiffs = l;
74         sdiffs = s;
75         paired = true;
76         
77         maxFBarcodeLength = 0;
78         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79             string forward = it->second.forward;
80             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
81             
82             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
83             
84             if (itForward == ifbarcodes.end()) {
85                 vector<int> temp; temp.push_back(it->first);
86                 ifbarcodes[forward] = temp;
87             }else { itForward->second.push_back(it->first); }
88         }
89         
90         maxFPrimerLength = 0;
91         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92             string forward = it->second.forward;
93             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
94             
95             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
96             
97             if (itForward == ifprimers.end()) {
98                 vector<int> temp; temp.push_back(it->first);
99                 ifprimers[forward] = temp;
100             }else { itForward->second.push_back(it->first); }
101         }
102         
103         maxRBarcodeLength = 0;
104         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105             string forward = it->second.reverse;
106             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
107             
108             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
109             
110             if (itForward == irbarcodes.end()) {
111                 vector<int> temp; temp.push_back(it->first);
112                 irbarcodes[forward] = temp;
113             }else { itForward->second.push_back(it->first); }
114         }
115         
116         maxRPrimerLength = 0;
117         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118             string forward = it->second.reverse;
119             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
120             
121             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
122             
123             if (itForward == irprimers.end()) {
124                 vector<int> temp; temp.push_back(it->first);
125                 irprimers[forward] = temp;
126             }else { itForward->second.push_back(it->first); }
127         }
128         
129         ipbarcodes = br;
130         ipprimers = pr;
131     }
132     catch(exception& e) {
133         m->errorOut(e, "TrimOligos", "TrimOligos");
134         exit(1);
135     }
136 }
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
140     try {
141         m = MothurOut::getInstance();
142         
143         pdiffs = p;
144         bdiffs = b;
145         
146         barcodes = br;
147         primers = pr;
148         revPrimer = r;
149         paired = false;
150         
151         maxFBarcodeLength = 0;
152         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153             string oligo = it->first;
154             if(oligo.length() > maxFBarcodeLength){
155                 maxFBarcodeLength = oligo.length();
156             }
157         }
158         maxRBarcodeLength = maxFBarcodeLength;
159         
160         maxFPrimerLength = 0;
161         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162             string oligo = it->first;
163             if(oligo.length() > maxFPrimerLength){
164                 maxFPrimerLength = oligo.length();
165             }
166         }
167         maxRPrimerLength = maxFPrimerLength;
168     }
169     catch(exception& e) {
170         m->errorOut(e, "TrimOligos", "TrimOligos");
171         exit(1);
172     }
173 }
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
178     try {
179         
180         string rawSequence = seq.getUnaligned();
181         
182         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183             string oligo = it->first;
184             
185             if(rawSequence.length() < oligo.length()) { break; }
186             
187             //search for primer
188             int olength = oligo.length();
189             for (int j = 0; j < rawSequence.length()-olength; j++){
190                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191                 string rawChunk = rawSequence.substr(j, olength);
192                 if(compareDNASeq(oligo, rawChunk)) {
193                     primerStart = j;
194                     primerEnd = primerStart + olength;
195                     return true;
196                 }
197                 
198             }
199         }
200         
201         primerStart = 0; primerEnd = 0;
202         //if you don't want to allow for diffs
203         if (pdiffs == 0) { return false; }
204         else { //try aligning and see if you can find it
205             
206             //can you find the barcode
207             int minDiff = 1e6;
208             int minCount = 1;
209             
210             Alignment* alignment;
211             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212             else{ alignment = NULL; }
213             
214             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
215                 
216                 string prim = it->first;
217                 //search for primer
218                 int olength = prim.length();
219                 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
220                 else{
221                     for (int j = 0; j < rawSequence.length()-olength; j++){
222                         
223                         string oligo = it->first;
224                         
225                         if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
226                         
227                         string rawChunk = rawSequence.substr(j, olength+pdiffs);
228                         
229                         //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230                         alignment->alignPrimer(oligo, rawChunk);
231                         oligo = alignment->getSeqAAln();
232                         string temp = alignment->getSeqBAln();
233                         
234                         int alnLength = oligo.length();
235                         
236                         for(int i=oligo.length()-1;i>=0;i--){
237                             if(oligo[i] != '-'){        alnLength = i+1;        break;  }
238                         }
239                         oligo = oligo.substr(0,alnLength);
240                         temp = temp.substr(0,alnLength);
241                         
242                         int numDiff = countDiffs(oligo, temp);
243                         
244                         if(numDiff < minDiff){
245                             minDiff = numDiff;
246                             minCount = 1;
247                             primerStart = j;
248                             primerEnd = primerStart + alnLength;
249                         }else if(numDiff == minDiff){ minCount++; }
250                     }
251                 }
252             }
253             
254             if (alignment != NULL) { delete alignment; }
255             
256             if(minDiff > pdiffs)        {       primerStart = 0; primerEnd = 0; return false;   }       //no good matches
257             else if(minCount > 1)       {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
258             else{ return true; }
259         }
260         
261         primerStart = 0; primerEnd = 0;
262         return false;
263         
264     }
265     catch(exception& e) {
266         m->errorOut(e, "TrimOligos", "stripForward");
267         exit(1);
268     }
269 }
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
272     try {
273         
274         string rawSequence = seq.getUnaligned();
275         
276         for(int i=0;i<revPrimer.size();i++){
277             string oligo = revPrimer[i];
278             if(rawSequence.length() < oligo.length()) { break; }
279             
280             //search for primer
281             int olength = oligo.length();
282             for (int j = rawSequence.length()-olength; j >= 0; j--){
283                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284                 string rawChunk = rawSequence.substr(j, olength);
285                 
286                 if(compareDNASeq(oligo, rawChunk)) {
287                     primerStart = j;
288                     primerEnd = primerStart + olength;
289                     return true;
290                 }
291                 
292             }
293         }
294         
295         primerStart = 0; primerEnd = 0;
296         return false;
297     }
298     catch(exception& e) {
299         m->errorOut(e, "PcrSeqsCommand", "findReverse");
300         exit(1);
301     }
302 }
303 //*******************************************************************/
304
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
306     try {
307         
308         if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
309         
310         string rawSequence = seq.getUnaligned();
311         int success = bdiffs + 1;       //guilty until proven innocent
312         
313         //can you find the barcode
314         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315             string oligo = it->first;
316             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
317                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
318                 break;
319             }
320             
321             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
322                 group = it->second;
323                 seq.setUnaligned(rawSequence.substr(oligo.length()));
324                 
325                 if(qual.getName() != ""){
326                     qual.trimQScores(oligo.length(), -1);
327                 }
328                 
329                 success = 0;
330                 break;
331             }
332         }
333         
334         //if you found the barcode or if you don't want to allow for diffs
335         if ((bdiffs == 0) || (success == 0)) { return success; }
336         
337         else { //try aligning and see if you can find it
338             Alignment* alignment;
339             if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340             else{ alignment = NULL; }
341             
342             //can you find the barcode
343             int minDiff = 1e6;
344             int minCount = 1;
345             int minGroup = -1;
346             int minPos = 0;
347             
348             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349                 string oligo = it->first;
350                 // int length = oligo.length();
351                 
352                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
353                     success = bdiffs + 10;
354                     break;
355                 }
356                 
357                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359                 oligo = alignment->getSeqAAln();
360                 string temp = alignment->getSeqBAln();
361                 
362                 int alnLength = oligo.length();
363                 
364                 for(int i=oligo.length()-1;i>=0;i--){
365                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
366                 }
367                 oligo = oligo.substr(0,alnLength);
368                 temp = temp.substr(0,alnLength);
369                 int numDiff = countDiffs(oligo, temp);
370                 
371                 if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
372                 
373                 if(numDiff < minDiff){
374                     minDiff = numDiff;
375                     minCount = 1;
376                     minGroup = it->second;
377                     minPos = 0;
378                     for(int i=0;i<alnLength;i++){
379                         if(temp[i] != '-'){
380                             minPos++;
381                         }
382                     }
383                 }
384                 else if(numDiff == minDiff){
385                     minCount++;
386                 }
387                 
388             }
389             
390             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
391             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
392             else{       //use the best match
393                 group = minGroup;
394                 seq.setUnaligned(rawSequence.substr(minPos));
395                 
396                 if(qual.getName() != ""){
397                     qual.trimQScores(minPos, -1);
398                 }
399                 success = minDiff;
400             }
401             
402             if (alignment != NULL) { delete alignment; }
403             
404         }
405         
406         return success;
407         
408     }
409     catch(exception& e) {
410         m->errorOut(e, "TrimOligos", "stripBarcode");
411         exit(1);
412     }
413 }
414 //*******************************************************************/
415 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
416     try {
417         //look for forward barcode
418         string rawFSequence = forwardSeq.getUnaligned();
419         string rawRSequence = reverseSeq.getUnaligned();
420         int success = bdiffs + 1;       //guilty until proven innocent
421         
422         //can you find the forward barcode
423         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
424             string foligo = it->second.forward;
425             string roligo = it->second.reverse;
426             
427             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
428                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
429                 break;
430             }
431             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
432                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
433                 break;
434             }
435             
436             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
437                 group = it->first;
438                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
439                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
440                 success = 0;
441                 break;
442             }
443         }
444         
445         //if you found the barcode or if you don't want to allow for diffs
446         if ((bdiffs == 0) || (success == 0)) { return success; }
447         else { //try aligning and see if you can find it
448             Alignment* alignment;
449             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
450             else{ alignment = NULL; }
451             
452             //can you find the barcode
453             int minDiff = 1e6;
454             int minCount = 1;
455             vector< vector<int> > minFGroup;
456             vector<int> minFPos;
457             
458             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
459             /*
460              1 Sarah Westcott
461              2 John Westcott
462              3 Anna Westcott
463              4 Sarah Schloss
464              5 Pat Schloss
465              6 Gail Brown
466              7 Pat Moore
467              only want to look for forward = Sarah, John, Anna, Pat, Gail
468              reverse = Westcott, Schloss, Brown, Moore
469              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
470              */
471             //cout << endl << forwardSeq.getName() << endl;
472             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
473                 string oligo = it->first;
474                 
475                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
476                     success = bdiffs + 10;
477                     break;
478                 }
479                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
480                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
481                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
482                 oligo = alignment->getSeqAAln();
483                 string temp = alignment->getSeqBAln();
484                 
485                 int alnLength = oligo.length();
486                 
487                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
488                 oligo = oligo.substr(0,alnLength);
489                 temp = temp.substr(0,alnLength);
490                 int numDiff = countDiffs(oligo, temp);
491                 
492                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
493                 
494                 if (alnLength == 0) { numDiff = bdiffs + 100; }
495                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
496                 
497                 if(numDiff < minDiff){
498                     minDiff = numDiff;
499                     minCount = 1;
500                     minFGroup.clear();
501                     minFGroup.push_back(it->second);
502                     int tempminFPos = 0;
503                     minFPos.clear();
504                     for(int i=0;i<alnLength;i++){
505                         if(temp[i] != '-'){
506                             tempminFPos++;
507                         }
508                     }
509                     minFPos.push_back(tempminFPos);
510                 }else if(numDiff == minDiff){
511                     minFGroup.push_back(it->second);
512                     int tempminFPos = 0;
513                     for(int i=0;i<alnLength;i++){
514                         if(temp[i] != '-'){
515                             tempminFPos++;
516                         }
517                     }
518                     minFPos.push_back(tempminFPos);
519                 }
520             }
521             
522             //cout << minDiff << '\t' << minCount << '\t' << endl;
523             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
524             else{
525                 //check for reverse match
526                 if (alignment != NULL) { delete alignment; }
527                 
528                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
529                 else{ alignment = NULL; }
530                 
531                 //can you find the barcode
532                 minDiff = 1e6;
533                 minCount = 1;
534                 vector< vector<int> > minRGroup;
535                 vector<int> minRPos;
536                 
537                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
538                     string oligo = it->first;
539                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
540                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
541                         success = bdiffs + 10;
542                         break;
543                     }
544                     
545                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
546                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
547                     oligo = alignment->getSeqAAln();
548                     string temp = alignment->getSeqBAln();
549                     
550                     int alnLength = oligo.length();
551                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
552                     oligo = oligo.substr(0,alnLength);
553                     temp = temp.substr(0,alnLength);
554                     int numDiff = countDiffs(oligo, temp);
555                     
556                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
557                     
558                     if (alnLength == 0) { numDiff = bdiffs + 100; }
559                     
560                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
561                     if(numDiff < minDiff){
562                         minDiff = numDiff;
563                         minCount = 1;
564                         minRGroup.clear();
565                         minRGroup.push_back(it->second);
566                         int tempminRPos = 0;
567                         minRPos.clear();
568                         for(int i=0;i<alnLength;i++){
569                             if(temp[i] != '-'){
570                                 tempminRPos++;
571                             }
572                         }
573                         minRPos.push_back(tempminRPos);
574                     }else if(numDiff == minDiff){
575                         int tempminRPos = 0;
576                         for(int i=0;i<alnLength;i++){
577                             if(temp[i] != '-'){
578                                 tempminRPos++;
579                             }
580                         }
581                         minRPos.push_back(tempminRPos);
582                         minRGroup.push_back(it->second);
583                     }
584                     
585                 }
586                 
587                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
588                 else {
589                     bool foundMatch = false;
590                     vector<int> matches;
591                     for (int i = 0; i < minFGroup.size(); i++) {
592                         for (int j = 0; j < minFGroup[i].size(); j++) {
593                             for (int k = 0; k < minRGroup.size(); k++) {
594                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
595                             }
596                         }
597                     }
598                     
599                     int fStart = 0;
600                     int rStart = 0;
601                     if (matches.size() == 1) {
602                         foundMatch = true;
603                         group = matches[0];
604                         fStart = minFPos[0];
605                         rStart = minRPos[0];
606                     }
607                     
608                     //we have an acceptable match for the forward and reverse, but do they match?
609                     if (foundMatch) {
610                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
611                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
612                         success = minDiff;
613                     }else { success = bdiffs + 100;     }
614                 }
615             }
616             
617             if (alignment != NULL) { delete alignment; }
618         }
619         
620         return success;
621         
622     }
623     catch(exception& e) {
624         m->errorOut(e, "TrimOligos", "stripIBarcode");
625         exit(1);
626     }
627     
628 }
629 //*******************************************************************/
630 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
631     try {
632         //look for forward barcode
633         string rawFSequence = forwardSeq.getUnaligned();
634         string rawRSequence = reverseSeq.getUnaligned();
635         int success = bdiffs + 1;       //guilty until proven innocent
636         
637         //can you find the forward barcode
638         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
639             string foligo = it->second.forward;
640             string roligo = it->second.reverse;
641             
642             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
643                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
644                 break;
645             }
646             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
647                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
648                 break;
649             }
650             
651             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
652                 group = it->first;
653                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
654                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
655                 forwardQual.trimQScores(foligo.length(), -1);
656                 reverseQual.trimQScores(roligo.length(), -1);
657                 success = 0;
658                 break;
659             }
660         }
661         
662         //if you found the barcode or if you don't want to allow for diffs
663         if ((bdiffs == 0) || (success == 0)) { return success; }
664         else { //try aligning and see if you can find it
665             Alignment* alignment;
666             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
667             else{ alignment = NULL; }
668             
669             //can you find the barcode
670             int minDiff = 1e6;
671             int minCount = 1;
672             vector< vector<int> > minFGroup;
673             vector<int> minFPos;
674             
675             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
676             /*
677              1 Sarah Westcott
678              2 John Westcott
679              3 Anna Westcott
680              4 Sarah Schloss
681              5 Pat Schloss
682              6 Gail Brown
683              7 Pat Moore
684              only want to look for forward = Sarah, John, Anna, Pat, Gail
685              reverse = Westcott, Schloss, Brown, Moore
686              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
687              */
688             //cout << endl << forwardSeq.getName() << endl;
689             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
690                 string oligo = it->first;
691                 
692                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
693                     success = bdiffs + 10;
694                     break;
695                 }
696                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
697                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
698                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
699                 oligo = alignment->getSeqAAln();
700                 string temp = alignment->getSeqBAln();
701                 
702                 int alnLength = oligo.length();
703                 
704                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
705                 oligo = oligo.substr(0,alnLength);
706                 temp = temp.substr(0,alnLength);
707                 int numDiff = countDiffs(oligo, temp);
708                 
709                 if (alnLength == 0) { numDiff = bdiffs + 100; }
710                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
711                 
712                 if(numDiff < minDiff){
713                     minDiff = numDiff;
714                     minCount = 1;
715                     minFGroup.clear();
716                     minFGroup.push_back(it->second);
717                     int tempminFPos = 0;
718                     minFPos.clear();
719                     for(int i=0;i<alnLength;i++){
720                         if(temp[i] != '-'){
721                             tempminFPos++;
722                         }
723                     }
724                     minFPos.push_back(tempminFPos);
725                 }else if(numDiff == minDiff){
726                     minFGroup.push_back(it->second);
727                     int tempminFPos = 0;
728                     for(int i=0;i<alnLength;i++){
729                         if(temp[i] != '-'){
730                             tempminFPos++;
731                         }
732                     }
733                     minFPos.push_back(tempminFPos);
734                 }
735             }
736             
737             //cout << minDiff << '\t' << minCount << '\t' << endl;
738             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
739             else{
740                 //check for reverse match
741                 if (alignment != NULL) { delete alignment; }
742                 
743                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
744                 else{ alignment = NULL; }
745                 
746                 //can you find the barcode
747                 minDiff = 1e6;
748                 minCount = 1;
749                 vector< vector<int> > minRGroup;
750                 vector<int> minRPos;
751                 
752                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
753                     string oligo = it->first;
754                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
755                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
756                         success = bdiffs + 10;
757                         break;
758                     }
759                     
760                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
761                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
762                     oligo = alignment->getSeqAAln();
763                     string temp = alignment->getSeqBAln();
764                     
765                     int alnLength = oligo.length();
766                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
767                     oligo = oligo.substr(0,alnLength);
768                     temp = temp.substr(0,alnLength);
769                     int numDiff = countDiffs(oligo, temp);
770                     if (alnLength == 0) { numDiff = bdiffs + 100; }
771                     
772                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + reverseSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
773                     
774                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
775                     if(numDiff < minDiff){
776                         minDiff = numDiff;
777                         minCount = 1;
778                         minRGroup.clear();
779                         minRGroup.push_back(it->second);
780                         int tempminRPos = 0;
781                         minRPos.clear();
782                         for(int i=0;i<alnLength;i++){
783                             if(temp[i] != '-'){
784                                 tempminRPos++;
785                             }
786                         }
787                         minRPos.push_back(tempminRPos);
788                     }else if(numDiff == minDiff){
789                         int tempminRPos = 0;
790                         for(int i=0;i<alnLength;i++){
791                             if(temp[i] != '-'){
792                                 tempminRPos++;
793                             }
794                         }
795                         minRPos.push_back(tempminRPos);
796                         minRGroup.push_back(it->second);
797                     }
798                     
799                 }
800                 
801                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
802                 else {
803                     bool foundMatch = false;
804                     vector<int> matches;
805                     for (int i = 0; i < minFGroup.size(); i++) {
806                         for (int j = 0; j < minFGroup[i].size(); j++) {
807                             for (int k = 0; k < minRGroup.size(); k++) {
808                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
809                             }
810                         }
811                     }
812                     
813                     int fStart = 0;
814                     int rStart = 0;
815                     if (matches.size() == 1) {
816                         foundMatch = true;
817                         group = matches[0];
818                         fStart = minFPos[0];
819                         rStart = minRPos[0];
820                     }
821                     
822                     //we have an acceptable match for the forward and reverse, but do they match?
823                     if (foundMatch) {
824                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
825                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
826                         forwardQual.trimQScores(fStart, -1);
827                         reverseQual.trimQScores(rStart, -1);
828                         success = minDiff;
829                     }else { success = bdiffs + 100;     }
830                 }
831             }
832             
833             if (alignment != NULL) { delete alignment; }
834         }
835         
836         return success;
837         
838     }
839     catch(exception& e) {
840         m->errorOut(e, "TrimOligos", "stripIBarcode");
841         exit(1);
842     }
843     
844 }
845 //*******************************************************************/
846 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
847     try {
848         //look for forward barcode
849         string rawSeq = seq.getUnaligned();
850         int success = bdiffs + 1;       //guilty until proven innocent
851         //cout << seq.getName() << endl;
852         //can you find the forward barcode
853         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
854             string foligo = it->second.forward;
855             string roligo = it->second.reverse;
856             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
857             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
858             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
859                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
860                 break;
861             }
862             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
863                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
864                 break;
865             }
866             
867             if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
868             
869             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
870                 group = it->first;
871                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
872                 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
873                 if(qual.getName() != ""){
874                     qual.trimQScores(-1, rawSeq.length()-roligo.length());
875                     qual.trimQScores(foligo.length(), -1);
876                 }
877                 success = 0;
878                 break;
879             }
880         }
881         //cout << "success=" << success << endl;
882         //if you found the barcode or if you don't want to allow for diffs
883         if ((bdiffs == 0) || (success == 0)) { return success; }
884         else { //try aligning and see if you can find it
885             Alignment* alignment;
886             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
887             else{ alignment = NULL; }
888             
889             //can you find the barcode
890             int minDiff = 1e6;
891             int minCount = 1;
892             vector< vector<int> > minFGroup;
893             vector<int> minFPos;
894             
895             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
896             /*
897              1 Sarah Westcott
898              2 John Westcott
899              3 Anna Westcott
900              4 Sarah Schloss
901              5 Pat Schloss
902              6 Gail Brown
903              7 Pat Moore
904              only want to look for forward = Sarah, John, Anna, Pat, Gail
905              reverse = Westcott, Schloss, Brown, Moore
906              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
907              */
908             //cout << endl << forwardSeq.getName() << endl;
909             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
910                 string oligo = it->first;
911                 
912                 if(rawSeq.length() < maxFBarcodeLength){        //let's just assume that the barcodes are the same length
913                     success = bdiffs + 10;
914                     break;
915                 }
916                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
917                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
918                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
919                 oligo = alignment->getSeqAAln();
920                 string temp = alignment->getSeqBAln();
921                 
922                 int alnLength = oligo.length();
923                 
924                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
925                 oligo = oligo.substr(0,alnLength);
926                 temp = temp.substr(0,alnLength);
927                 int numDiff = countDiffs(oligo, temp);
928                 
929                 if (alnLength == 0) { numDiff = bdiffs + 100; }
930                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
931                 
932                 if(numDiff < minDiff){
933                     minDiff = numDiff;
934                     minCount = 1;
935                     minFGroup.clear();
936                     minFGroup.push_back(it->second);
937                     int tempminFPos = 0;
938                     minFPos.clear();
939                     for(int i=0;i<alnLength;i++){
940                         if(temp[i] != '-'){
941                             tempminFPos++;
942                         }
943                     }
944                     minFPos.push_back(tempminFPos);
945                 }else if(numDiff == minDiff){
946                     minFGroup.push_back(it->second);
947                     int tempminFPos = 0;
948                     for(int i=0;i<alnLength;i++){
949                         if(temp[i] != '-'){
950                             tempminFPos++;
951                         }
952                     }
953                     minFPos.push_back(tempminFPos);
954                 }
955             }
956             
957             //cout << minDiff << '\t' << minCount << '\t' << endl;
958             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
959             else{
960                 //check for reverse match
961                 if (alignment != NULL) { delete alignment; }
962                 
963                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
964                 else{ alignment = NULL; }
965                 
966                 //can you find the barcode
967                 minDiff = 1e6;
968                 minCount = 1;
969                 vector< vector<int> > minRGroup;
970                 vector<int> minRPos;
971                 
972                 string rawRSequence = reverseOligo(seq.getUnaligned());
973                 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
974                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
975                     string oligo = reverseOligo(it->first);
976                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
977                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
978                         success = bdiffs + 10;
979                         break;
980                     }
981                     
982                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
983                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
984                     oligo = alignment->getSeqAAln();
985                     string temp = alignment->getSeqBAln();
986                     
987                     int alnLength = oligo.length();
988                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
989                     oligo = oligo.substr(0,alnLength);
990                     temp = temp.substr(0,alnLength);
991                     int numDiff = countDiffs(oligo, temp);
992                     if (alnLength == 0) { numDiff = bdiffs + 100; }
993                     
994                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
995                     if(numDiff < minDiff){
996                         minDiff = numDiff;
997                         minCount = 1;
998                         minRGroup.clear();
999                         minRGroup.push_back(it->second);
1000                         int tempminRPos = 0;
1001                         minRPos.clear();
1002                         for(int i=0;i<alnLength;i++){
1003                             if(temp[i] != '-'){
1004                                 tempminRPos++;
1005                             }
1006                         }
1007                         minRPos.push_back(tempminRPos);
1008                     }else if(numDiff == minDiff){
1009                         int tempminRPos = 0;
1010                         for(int i=0;i<alnLength;i++){
1011                             if(temp[i] != '-'){
1012                                 tempminRPos++;
1013                             }
1014                         }
1015                         minRPos.push_back(tempminRPos);
1016                         minRGroup.push_back(it->second);
1017                     }
1018                     
1019                 }
1020                 
1021                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
1022                 else {
1023                     bool foundMatch = false;
1024                     vector<int> matches;
1025                     for (int i = 0; i < minFGroup.size(); i++) {
1026                         for (int j = 0; j < minFGroup[i].size(); j++) {
1027                             for (int k = 0; k < minRGroup.size(); k++) {
1028                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1029                             }
1030                         }
1031                     }
1032                     
1033                     int fStart = 0;
1034                     int rStart = 0;
1035                     if (matches.size() == 1) {
1036                         foundMatch = true;
1037                         group = matches[0];
1038                         fStart = minFPos[0];
1039                         rStart = rawSeq.length() - minRPos[0];
1040                         if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
1041                     }
1042                     
1043                     //we have an acceptable match for the forward and reverse, but do they match?
1044                     if (foundMatch) {
1045                         string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1046                         seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1047                         if(qual.getName() != ""){
1048                             qual.trimQScores(-1, rStart);
1049                             qual.trimQScores(fStart, -1);
1050                         }
1051                         success = minDiff;
1052                         //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1053                     }else { success = bdiffs + 100;     }
1054                 }
1055             }
1056             
1057             if (alignment != NULL) { delete alignment; }
1058         }
1059         
1060         return success;
1061         
1062     }
1063     catch(exception& e) {
1064         m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1065         exit(1);
1066     }
1067     
1068 }
1069 //*******************************************************************/
1070 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1071     try {
1072         //look for forward
1073         string rawSeq = seq.getUnaligned();
1074         int success = pdiffs + 1;       //guilty until proven innocent
1075         //cout << seq.getName() << endl;
1076         //can you find the forward
1077         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1078             string foligo = it->second.forward;
1079             string roligo = it->second.reverse;
1080             
1081             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1082             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1083             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
1084                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1085                 break;
1086             }
1087             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
1088                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1089                 break;
1090             }
1091             
1092             if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
1093             
1094             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1095                 group = it->first;
1096                 if (!keepForward) {
1097                     string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1098                     seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1099                     if(qual.getName() != ""){
1100                         qual.trimQScores(-1, rawSeq.length()-roligo.length());
1101                         qual.trimQScores(foligo.length(), -1);
1102                     }
1103                 }
1104                 success = 0;
1105                 break;
1106             }
1107         }
1108         //cout << "success=" << success << endl;
1109         //if you found the barcode or if you don't want to allow for diffs
1110         if ((pdiffs == 0) || (success == 0)) { return success; }
1111         else { //try aligning and see if you can find it
1112             Alignment* alignment;
1113             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1114             else{ alignment = NULL; }
1115             
1116             //can you find the barcode
1117             int minDiff = 1e6;
1118             int minCount = 1;
1119             vector< vector<int> > minFGroup;
1120             vector<int> minFPos;
1121             
1122             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1123             /*
1124              1 Sarah Westcott
1125              2 John Westcott
1126              3 Anna Westcott
1127              4 Sarah Schloss
1128              5 Pat Schloss
1129              6 Gail Brown
1130              7 Pat Moore
1131              only want to look for forward = Sarah, John, Anna, Pat, Gail
1132              reverse = Westcott, Schloss, Brown, Moore
1133              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1134              */
1135             //cout << endl << forwardSeq.getName() << endl;
1136             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1137                 string oligo = it->first;
1138                 
1139                 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1140                     success = pdiffs + 10;
1141                     break;
1142                 }
1143                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
1144                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1145                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1146                 oligo = alignment->getSeqAAln();
1147                 string temp = alignment->getSeqBAln();
1148                 
1149                 int alnLength = oligo.length();
1150                 
1151                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1152                 oligo = oligo.substr(0,alnLength);
1153                 temp = temp.substr(0,alnLength);
1154                 int numDiff = countDiffs(oligo, temp);
1155                 
1156                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1157                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1158                 
1159                 if(numDiff < minDiff){
1160                     minDiff = numDiff;
1161                     minCount = 1;
1162                     minFGroup.clear();
1163                     minFGroup.push_back(it->second);
1164                     int tempminFPos = 0;
1165                     minFPos.clear();
1166                     for(int i=0;i<alnLength;i++){
1167                         if(temp[i] != '-'){
1168                             tempminFPos++;
1169                         }
1170                     }
1171                     minFPos.push_back(tempminFPos);
1172                 }else if(numDiff == minDiff){
1173                     minFGroup.push_back(it->second);
1174                     int tempminFPos = 0;
1175                     for(int i=0;i<alnLength;i++){
1176                         if(temp[i] != '-'){
1177                             tempminFPos++;
1178                         }
1179                     }
1180                     minFPos.push_back(tempminFPos);
1181                 }
1182             }
1183             
1184             //cout << minDiff << '\t' << minCount << '\t' << endl;
1185             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1186             else{
1187                 //check for reverse match
1188                 if (alignment != NULL) { delete alignment; }
1189                 
1190                 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1191                 else{ alignment = NULL; }
1192                 
1193                 //can you find the barcode
1194                 minDiff = 1e6;
1195                 minCount = 1;
1196                 vector< vector<int> > minRGroup;
1197                 vector<int> minRPos;
1198                 
1199                 string rawRSequence = reverseOligo(seq.getUnaligned());
1200                 
1201                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1202                     string oligo = reverseOligo(it->first);
1203                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1204                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1205                         success = pdiffs + 10;
1206                         break;
1207                     }
1208                     
1209                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1210                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1211                     oligo = alignment->getSeqAAln();
1212                     string temp = alignment->getSeqBAln();
1213                     
1214                     int alnLength = oligo.length();
1215                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1216                     oligo = oligo.substr(0,alnLength);
1217                     temp = temp.substr(0,alnLength);
1218                     int numDiff = countDiffs(oligo, temp);
1219                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1220                     
1221                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1222                     if(numDiff < minDiff){
1223                         minDiff = numDiff;
1224                         minCount = 1;
1225                         minRGroup.clear();
1226                         minRGroup.push_back(it->second);
1227                         int tempminRPos = 0;
1228                         minRPos.clear();
1229                         for(int i=0;i<alnLength;i++){
1230                             if(temp[i] != '-'){
1231                                 tempminRPos++;
1232                             }
1233                         }
1234                         minRPos.push_back(tempminRPos);
1235                     }else if(numDiff == minDiff){
1236                         int tempminRPos = 0;
1237                         for(int i=0;i<alnLength;i++){
1238                             if(temp[i] != '-'){
1239                                 tempminRPos++;
1240                             }
1241                         }
1242                         minRPos.push_back(tempminRPos);
1243                         minRGroup.push_back(it->second);
1244                     }
1245                     
1246                 }
1247                 
1248                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1249                 else {
1250                     bool foundMatch = false;
1251                     vector<int> matches;
1252                     for (int i = 0; i < minFGroup.size(); i++) {
1253                         for (int j = 0; j < minFGroup[i].size(); j++) {
1254                             for (int k = 0; k < minRGroup.size(); k++) {
1255                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1256                             }
1257                         }
1258                     }
1259                     
1260                     int fStart = 0;
1261                     int rStart = 0;
1262                     if (matches.size() == 1) {
1263                         foundMatch = true;
1264                         group = matches[0];
1265                         fStart = minFPos[0];
1266                         rStart = rawSeq.length() - minRPos[0];
1267                         if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
1268                     }
1269                     
1270                     //we have an acceptable match for the forward and reverse, but do they match?
1271                     if (foundMatch) {
1272                         if (!keepForward) {
1273                             string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1274                             seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1275                             if(qual.getName() != ""){
1276                                 qual.trimQScores(-1, rStart);
1277                                 qual.trimQScores(fStart, -1);
1278                             }
1279                         }
1280                         success = minDiff;
1281                         //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1282                     }else { success = pdiffs + 100;     }
1283                 }
1284             }
1285             
1286             if (alignment != NULL) { delete alignment; }
1287         }
1288         
1289         return success;
1290         
1291     }
1292     catch(exception& e) {
1293         m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1294         exit(1);
1295     }
1296     
1297 }
1298
1299 //*******************************************************************/
1300 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1301     try {
1302         //look for forward barcode
1303         string rawFSequence = forwardSeq.getUnaligned();
1304         string rawRSequence = reverseSeq.getUnaligned();
1305         int success = pdiffs + 1;       //guilty until proven innocent
1306         
1307         //can you find the forward barcode
1308         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1309             string foligo = it->second.forward;
1310             string roligo = it->second.reverse;
1311             
1312             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1313                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1314                 break;
1315             }
1316             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1317                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1318                 break;
1319             }
1320             
1321             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
1322                 group = it->first;
1323                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1324                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1325                 forwardQual.trimQScores(foligo.length(), -1);
1326                 reverseQual.trimQScores(roligo.length(), -1);
1327                 success = 0;
1328                 break;
1329             }
1330         }
1331         
1332         //if you found the barcode or if you don't want to allow for diffs
1333         if ((pdiffs == 0) || (success == 0)) { return success; }
1334         else { //try aligning and see if you can find it
1335             Alignment* alignment;
1336             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1337             else{ alignment = NULL; }
1338             
1339             //can you find the barcode
1340             int minDiff = 1e6;
1341             int minCount = 1;
1342             vector< vector<int> > minFGroup;
1343             vector<int> minFPos;
1344             
1345             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1346             /*
1347              1 Sarah Westcott
1348              2 John Westcott
1349              3 Anna Westcott
1350              4 Sarah Schloss
1351              5 Pat Schloss
1352              6 Gail Brown
1353              7 Pat Moore
1354              only want to look for forward = Sarah, John, Anna, Pat, Gail
1355              reverse = Westcott, Schloss, Brown, Moore
1356              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1357              */
1358             //cout << endl << forwardSeq.getName() << endl;
1359             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1360                 string oligo = it->first;
1361                 
1362                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1363                     success = pdiffs + 10;
1364                     break;
1365                 }
1366                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1367                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1368                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1369                 oligo = alignment->getSeqAAln();
1370                 string temp = alignment->getSeqBAln();
1371                 
1372                 int alnLength = oligo.length();
1373                 
1374                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1375                 oligo = oligo.substr(0,alnLength);
1376                 temp = temp.substr(0,alnLength);
1377                 int numDiff = countDiffs(oligo, temp);
1378                 
1379                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1380                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1381                 
1382                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
1383                 
1384                 if(numDiff < minDiff){
1385                     minDiff = numDiff;
1386                     minCount = 1;
1387                     minFGroup.clear();
1388                     minFGroup.push_back(it->second);
1389                     int tempminFPos = 0;
1390                     minFPos.clear();
1391                     for(int i=0;i<alnLength;i++){
1392                         if(temp[i] != '-'){
1393                             tempminFPos++;
1394                         }
1395                     }
1396                     minFPos.push_back(tempminFPos);
1397                 }else if(numDiff == minDiff){
1398                     minFGroup.push_back(it->second);
1399                     int tempminFPos = 0;
1400                     for(int i=0;i<alnLength;i++){
1401                         if(temp[i] != '-'){
1402                             tempminFPos++;
1403                         }
1404                     }
1405                     minFPos.push_back(tempminFPos);
1406                 }
1407             }
1408             
1409             //cout << minDiff << '\t' << minCount << '\t' << endl;
1410             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1411             else{
1412                 //check for reverse match
1413                 if (alignment != NULL) { delete alignment; }
1414                 
1415                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1416                 else{ alignment = NULL; }
1417                 
1418                 //can you find the barcode
1419                 minDiff = 1e6;
1420                 minCount = 1;
1421                 vector< vector<int> > minRGroup;
1422                 vector<int> minRPos;
1423                 
1424                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1425                     string oligo = it->first;
1426                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1427                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1428                         success = pdiffs + 10;
1429                         break;
1430                     }
1431                     
1432                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1433                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1434                     oligo = alignment->getSeqAAln();
1435                     string temp = alignment->getSeqBAln();
1436                     
1437                     int alnLength = oligo.length();
1438                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1439                     oligo = oligo.substr(0,alnLength);
1440                     temp = temp.substr(0,alnLength);
1441                     int numDiff = countDiffs(oligo, temp);
1442                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1443                     
1444                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
1445                     
1446                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1447                     if(numDiff < minDiff){
1448                         minDiff = numDiff;
1449                         minCount = 1;
1450                         minRGroup.clear();
1451                         minRGroup.push_back(it->second);
1452                         int tempminRPos = 0;
1453                         minRPos.clear();
1454                         for(int i=0;i<alnLength;i++){
1455                             if(temp[i] != '-'){
1456                                 tempminRPos++;
1457                             }
1458                         }
1459                         minRPos.push_back(tempminRPos);
1460                     }else if(numDiff == minDiff){
1461                         int tempminRPos = 0;
1462                         for(int i=0;i<alnLength;i++){
1463                             if(temp[i] != '-'){
1464                                 tempminRPos++;
1465                             }
1466                         }
1467                         minRPos.push_back(tempminRPos);
1468                         minRGroup.push_back(it->second);
1469                     }
1470                     
1471                 }
1472                 
1473                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1474                 else {
1475                     bool foundMatch = false;
1476                     vector<int> matches;
1477                     for (int i = 0; i < minFGroup.size(); i++) {
1478                         for (int j = 0; j < minFGroup[i].size(); j++) {
1479                             for (int k = 0; k < minRGroup.size(); k++) {
1480                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1481                             }
1482                         }
1483                     }
1484                     
1485                     int fStart = 0;
1486                     int rStart = 0;
1487                     if (matches.size() == 1) {
1488                         foundMatch = true;
1489                         group = matches[0];
1490                         fStart = minFPos[0];
1491                         rStart = minRPos[0];
1492                     }
1493                     
1494                     //we have an acceptable match for the forward and reverse, but do they match?
1495                     if (foundMatch) {
1496                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1497                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1498                         forwardQual.trimQScores(fStart, -1);
1499                         reverseQual.trimQScores(rStart, -1);
1500                         success = minDiff;
1501                     }else { success = pdiffs + 100;     }
1502                 }
1503             }
1504             
1505             if (alignment != NULL) { delete alignment; }
1506         }
1507         
1508         return success;
1509         
1510     }
1511     catch(exception& e) {
1512         m->errorOut(e, "TrimOligos", "stripIForward");
1513         exit(1);
1514     }
1515     
1516 }
1517 //*******************************************************************/
1518 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1519     try {
1520         //look for forward barcode
1521         string rawFSequence = forwardSeq.getUnaligned();
1522         string rawRSequence = reverseSeq.getUnaligned();
1523         int success = pdiffs + 1;       //guilty until proven innocent
1524         
1525         //can you find the forward barcode
1526         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1527             string foligo = it->second.forward;
1528             string roligo = it->second.reverse;
1529             
1530             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1531                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1532                 break;
1533             }
1534             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1535                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1536                 break;
1537             }
1538             
1539             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1540                 group = it->first;
1541                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1542                 reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
1543                 success = 0;
1544                 break;
1545             }
1546         }
1547         
1548         //if you found the barcode or if you don't want to allow for diffs
1549         if ((pdiffs == 0) || (success == 0)) { return success; }
1550         else { //try aligning and see if you can find it
1551             Alignment* alignment;
1552             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1553             else{ alignment = NULL; }
1554             
1555             //can you find the barcode
1556             int minDiff = 1e6;
1557             int minCount = 1;
1558             vector< vector<int> > minFGroup;
1559             vector<int> minFPos;
1560             
1561             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1562             /*
1563              1 Sarah Westcott
1564              2 John Westcott
1565              3 Anna Westcott
1566              4 Sarah Schloss
1567              5 Pat Schloss
1568              6 Gail Brown
1569              7 Pat Moore
1570              only want to look for forward = Sarah, John, Anna, Pat, Gail
1571              reverse = Westcott, Schloss, Brown, Moore
1572              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1573              */
1574             //cout << endl << forwardSeq.getName() << endl;
1575             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1576                 string oligo = it->first;
1577                 
1578                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1579                     success = pdiffs + 10;
1580                     break;
1581                 }
1582                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1583                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1584                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1585                 oligo = alignment->getSeqAAln();
1586                 string temp = alignment->getSeqBAln();
1587                 
1588                 int alnLength = oligo.length();
1589                 
1590                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1591                 oligo = oligo.substr(0,alnLength);
1592                 temp = temp.substr(0,alnLength);
1593                 int numDiff = countDiffs(oligo, temp);
1594                 
1595                 if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
1596                 
1597                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1598                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1599                 
1600                 if(numDiff < minDiff){
1601                     minDiff = numDiff;
1602                     minCount = 1;
1603                     minFGroup.clear();
1604                     minFGroup.push_back(it->second);
1605                     int tempminFPos = 0;
1606                     minFPos.clear();
1607                     for(int i=0;i<alnLength;i++){
1608                         if(temp[i] != '-'){
1609                             tempminFPos++;
1610                         }
1611                     }
1612                     minFPos.push_back(tempminFPos);
1613                 }else if(numDiff == minDiff){
1614                     minFGroup.push_back(it->second);
1615                     int tempminFPos = 0;
1616                     for(int i=0;i<alnLength;i++){
1617                         if(temp[i] != '-'){
1618                             tempminFPos++;
1619                         }
1620                     }
1621                     minFPos.push_back(tempminFPos);
1622                 }
1623             }
1624             
1625             //cout << minDiff << '\t' << minCount << '\t' << endl;
1626             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1627             else{
1628                 //check for reverse match
1629                 if (alignment != NULL) { delete alignment; }
1630                 
1631                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1632                 else{ alignment = NULL; }
1633                 
1634                 //can you find the barcode
1635                 minDiff = 1e6;
1636                 minCount = 1;
1637                 vector< vector<int> > minRGroup;
1638                 vector<int> minRPos;
1639                 
1640                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1641                     string oligo = it->first;
1642                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1643                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1644                         success = pdiffs + 10;
1645                         break;
1646                     }
1647                     
1648                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1649                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1650                     oligo = alignment->getSeqAAln();
1651                     string temp = alignment->getSeqBAln();
1652                     
1653                     int alnLength = oligo.length();
1654                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1655                     oligo = oligo.substr(0,alnLength);
1656                     temp = temp.substr(0,alnLength);
1657                     int numDiff = countDiffs(oligo, temp);
1658                     
1659                     if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
1660                     
1661                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1662                     
1663                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1664                     if(numDiff < minDiff){
1665                         minDiff = numDiff;
1666                         minCount = 1;
1667                         minRGroup.clear();
1668                         minRGroup.push_back(it->second);
1669                         int tempminRPos = 0;
1670                         minRPos.clear();
1671                         for(int i=0;i<alnLength;i++){
1672                             if(temp[i] != '-'){
1673                                 tempminRPos++;
1674                             }
1675                         }
1676                         minRPos.push_back(tempminRPos);
1677                     }else if(numDiff == minDiff){
1678                         int tempminRPos = 0;
1679                         for(int i=0;i<alnLength;i++){
1680                             if(temp[i] != '-'){
1681                                 tempminRPos++;
1682                             }
1683                         }
1684                         minRPos.push_back(tempminRPos);
1685                         minRGroup.push_back(it->second);
1686                     }
1687                     
1688                 }
1689                 
1690                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1691                 else {
1692                     bool foundMatch = false;
1693                     vector<int> matches;
1694                     for (int i = 0; i < minFGroup.size(); i++) {
1695                         for (int j = 0; j < minFGroup[i].size(); j++) {
1696                             for (int k = 0; k < minRGroup.size(); k++) {
1697                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1698                             }
1699                         }
1700                     }
1701                     
1702                     int fStart = 0;
1703                     int rStart = 0;
1704                     if (matches.size() == 1) {
1705                         foundMatch = true;
1706                         group = matches[0];
1707                         fStart = minFPos[0];
1708                         rStart = minRPos[0];
1709                     }
1710                     
1711                     //we have an acceptable match for the forward and reverse, but do they match?
1712                     if (foundMatch) {
1713                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1714                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1715                         success = minDiff;
1716                     }else { success = pdiffs + 100;     }
1717                 }
1718             }
1719             
1720             if (alignment != NULL) { delete alignment; }
1721         }
1722         
1723         return success;
1724         
1725     }
1726     catch(exception& e) {
1727         m->errorOut(e, "TrimOligos", "stripIForward");
1728         exit(1);
1729     }
1730     
1731 }
1732 //*******************************************************************/
1733 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1734     try {
1735         
1736         string rawSequence = seq.getUnaligned();
1737         int success = bdiffs + 1;       //guilty until proven innocent
1738         
1739         //can you find the barcode
1740         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1741             string oligo = it->first;
1742             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
1743                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1744                 break;
1745             }
1746             
1747             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1748                 group = it->second;
1749                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1750                 
1751                 success = 0;
1752                 break;
1753             }
1754         }
1755         
1756         //if you found the barcode or if you don't want to allow for diffs
1757         if ((bdiffs == 0) || (success == 0)) { return success; }
1758         
1759         else { //try aligning and see if you can find it
1760             Alignment* alignment;
1761             if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1762             else{ alignment = NULL; }
1763             
1764             //can you find the barcode
1765             int minDiff = 1e6;
1766             int minCount = 1;
1767             int minGroup = -1;
1768             int minPos = 0;
1769             
1770             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1771                 string oligo = it->first;
1772                 // int length = oligo.length();
1773                 
1774                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
1775                     success = bdiffs + 10;
1776                     break;
1777                 }
1778                 
1779                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1780                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1781                 oligo = alignment->getSeqAAln();
1782                 string temp = alignment->getSeqBAln();
1783                 
1784                 int alnLength = oligo.length();
1785                 
1786                 for(int i=oligo.length()-1;i>=0;i--){
1787                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1788                 }
1789                 oligo = oligo.substr(0,alnLength);
1790                 temp = temp.substr(0,alnLength);
1791                 
1792                 int numDiff = countDiffs(oligo, temp);
1793                 
1794                 if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
1795                 
1796                 if(numDiff < minDiff){
1797                     minDiff = numDiff;
1798                     minCount = 1;
1799                     minGroup = it->second;
1800                     minPos = 0;
1801                     for(int i=0;i<alnLength;i++){
1802                         if(temp[i] != '-'){
1803                             minPos++;
1804                         }
1805                     }
1806                 }
1807                 else if(numDiff == minDiff){
1808                     minCount++;
1809                 }
1810                 
1811             }
1812             
1813             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
1814             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1815             else{       //use the best match
1816                 group = minGroup;
1817                 seq.setUnaligned(rawSequence.substr(minPos));
1818                 success = minDiff;
1819             }
1820             
1821             if (alignment != NULL) { delete alignment; }
1822             
1823         }
1824         
1825         return success;
1826         
1827     }
1828     catch(exception& e) {
1829         m->errorOut(e, "TrimOligos", "stripBarcode");
1830         exit(1);
1831     }
1832     
1833 }
1834
1835 /********************************************************************/
1836 int TrimOligos::stripForward(Sequence& seq, int& group){
1837     try {
1838         
1839         string rawSequence = seq.getUnaligned();
1840         int success = pdiffs + 1;       //guilty until proven innocent
1841         
1842         //can you find the primer
1843         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1844             string oligo = it->first;
1845             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
1846                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1847                 break;
1848             }
1849             
1850             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1851                 group = it->second;
1852                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1853                 success = 0;
1854                 break;
1855             }
1856         }
1857         
1858         //if you found the barcode or if you don't want to allow for diffs
1859         if ((pdiffs == 0) || (success == 0)) {  return success; }
1860         
1861         else { //try aligning and see if you can find it
1862             Alignment* alignment;
1863             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1864             else{ alignment = NULL; }
1865             
1866             //can you find the barcode
1867             int minDiff = 1e6;
1868             int minCount = 1;
1869             int minGroup = -1;
1870             int minPos = 0;
1871             
1872             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1873                 string oligo = it->first;
1874                 // int length = oligo.length();
1875                 
1876                 if(rawSequence.length() < maxFPrimerLength){
1877                     success = pdiffs + 100;
1878                     break;
1879                 }
1880                 
1881                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1882                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1883                 oligo = alignment->getSeqAAln();
1884                 string temp = alignment->getSeqBAln();
1885                 
1886                 int alnLength = oligo.length();
1887                 
1888                 for(int i=oligo.length()-1;i>=0;i--){
1889                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1890                 }
1891                 oligo = oligo.substr(0,alnLength);
1892                 temp = temp.substr(0,alnLength);
1893                 
1894                 int numDiff = countDiffs(oligo, temp);
1895                 
1896                 if(numDiff < minDiff){
1897                     minDiff = numDiff;
1898                     minCount = 1;
1899                     minGroup = it->second;
1900                     minPos = 0;
1901                     for(int i=0;i<alnLength;i++){
1902                         if(temp[i] != '-'){
1903                             minPos++;
1904                         }
1905                     }
1906                 }
1907                 else if(numDiff == minDiff){
1908                     minCount++;
1909                 }
1910                 
1911             }
1912             
1913             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1914             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1915             else{       //use the best match
1916                 group = minGroup;
1917                 seq.setUnaligned(rawSequence.substr(minPos));
1918                 success = minDiff;
1919             }
1920             
1921             if (alignment != NULL) { delete alignment; }
1922             
1923         }
1924         
1925         return success;
1926         
1927     }
1928     catch(exception& e) {
1929         m->errorOut(e, "TrimOligos", "stripForward");
1930         exit(1);
1931     }
1932 }
1933 //*******************************************************************/
1934 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1935     try {
1936         
1937         if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
1938         
1939         string rawSequence = seq.getUnaligned();
1940         int success = pdiffs + 1;       //guilty until proven innocent
1941         
1942         //can you find the primer
1943         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1944             string oligo = it->first;
1945             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
1946                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1947                 break;
1948             }
1949             
1950             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1951                 group = it->second;
1952                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1953                 if(qual.getName() != ""){
1954                     if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
1955                 }
1956                 success = 0;
1957                 break;
1958             }
1959         }
1960         
1961         //if you found the barcode or if you don't want to allow for diffs
1962         if ((pdiffs == 0) || (success == 0)) { return success; }
1963         
1964         else { //try aligning and see if you can find it
1965             Alignment* alignment;
1966             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1967             else{ alignment = NULL; }
1968             
1969             //can you find the barcode
1970             int minDiff = 1e6;
1971             int minCount = 1;
1972             int minGroup = -1;
1973             int minPos = 0;
1974             
1975             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1976                 string oligo = it->first;
1977                 // int length = oligo.length();
1978                 
1979                 if(rawSequence.length() < maxFPrimerLength){
1980                     success = pdiffs + 100;
1981                     break;
1982                 }
1983                 
1984                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1985                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1986                 oligo = alignment->getSeqAAln();
1987                 string temp = alignment->getSeqBAln();
1988                 
1989                 int alnLength = oligo.length();
1990                 
1991                 for(int i=oligo.length()-1;i>=0;i--){
1992                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1993                 }
1994                 oligo = oligo.substr(0,alnLength);
1995                 temp = temp.substr(0,alnLength);
1996                 
1997                 int numDiff = countDiffs(oligo, temp);
1998                 
1999                 if(numDiff < minDiff){
2000                     minDiff = numDiff;
2001                     minCount = 1;
2002                     minGroup = it->second;
2003                     minPos = 0;
2004                     for(int i=0;i<alnLength;i++){
2005                         if(temp[i] != '-'){
2006                             minPos++;
2007                         }
2008                     }
2009                 }
2010                 else if(numDiff == minDiff){
2011                     minCount++;
2012                 }
2013                 
2014             }
2015             
2016             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
2017             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
2018             else{       //use the best match
2019                 group = minGroup;
2020                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2021                 if(qual.getName() != ""){
2022                     if (!keepForward) { qual.trimQScores(minPos, -1); }
2023                 }
2024                 success = minDiff;
2025             }
2026             
2027             if (alignment != NULL) { delete alignment; }
2028             
2029         }
2030         
2031         return success;
2032         
2033     }
2034     catch(exception& e) {
2035         m->errorOut(e, "TrimOligos", "stripForward");
2036         exit(1);
2037     }
2038 }
2039 //******************************************************************/
2040 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2041     try {
2042         string rawSequence = seq.getUnaligned();
2043         bool success = 0;       //guilty until proven innocent
2044         
2045         for(int i=0;i<revPrimer.size();i++){
2046             string oligo = revPrimer[i];
2047             
2048             if(rawSequence.length() < oligo.length()){
2049                 success = 0;
2050                 break;
2051             }
2052             
2053             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2054                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2055                 if(qual.getName() != ""){
2056                     qual.trimQScores(-1, rawSequence.length()-oligo.length());
2057                 }
2058                 success = 1;
2059                 break;
2060             }
2061         }       
2062         return success;
2063         
2064     }
2065     catch(exception& e) {
2066         m->errorOut(e, "TrimOligos", "stripReverse");
2067         exit(1);
2068     }
2069 }
2070 //******************************************************************/
2071 bool TrimOligos::stripReverse(Sequence& seq){
2072     try {
2073         
2074         string rawSequence = seq.getUnaligned();
2075         bool success = 0;       //guilty until proven innocent
2076         
2077         for(int i=0;i<revPrimer.size();i++){
2078             string oligo = revPrimer[i];
2079             
2080             if(rawSequence.length() < oligo.length()){
2081                 success = 0;
2082                 break;
2083             }
2084             
2085             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2086                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2087                 success = 1;
2088                 break;
2089             }
2090         }       
2091         
2092         return success;
2093         
2094     }
2095     catch(exception& e) {
2096         m->errorOut(e, "TrimOligos", "stripReverse");
2097         exit(1);
2098     }
2099 }
2100 //******************************************************************/
2101 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2102     try {
2103         string rawSequence = seq.getUnaligned();
2104         bool success = ldiffs + 1;      //guilty until proven innocent
2105         
2106         for(int i=0;i<linker.size();i++){
2107             string oligo = linker[i];
2108             
2109             if(rawSequence.length() < oligo.length()){
2110                 success = ldiffs + 10;
2111                 break;
2112             }
2113             
2114             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2115                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2116                 if(qual.getName() != ""){
2117                     qual.trimQScores(oligo.length(), -1);
2118                 }
2119                 success = 0;
2120                 break;
2121             }
2122         }
2123         
2124         //if you found the linker or if you don't want to allow for diffs
2125         if ((ldiffs == 0) || (success == 0)) { return success; }
2126         
2127         else { //try aligning and see if you can find it
2128             Alignment* alignment;
2129             if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }   
2130             else{ alignment = NULL; }
2131             
2132             //can you find the barcode
2133             int minDiff = 1e6;
2134             int minCount = 1;
2135             int minPos = 0;
2136             
2137             for(int i = 0; i < linker.size(); i++){
2138                 string oligo = linker[i];
2139                 // int length = oligo.length();
2140                 
2141                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2142                     success = ldiffs + 10;
2143                     break;
2144                 }
2145                 
2146                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2147                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2148                 oligo = alignment->getSeqAAln();
2149                 string temp = alignment->getSeqBAln();
2150                 
2151                 int alnLength = oligo.length();
2152                 
2153                 for(int i=oligo.length()-1;i>=0;i--){
2154                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2155                 }
2156                 oligo = oligo.substr(0,alnLength);
2157                 temp = temp.substr(0,alnLength);
2158                 
2159                 int numDiff = countDiffs(oligo, temp);
2160                 
2161                 if(numDiff < minDiff){
2162                     minDiff = numDiff;
2163                     minCount = 1;
2164                     minPos = 0;
2165                     for(int i=0;i<alnLength;i++){
2166                         if(temp[i] != '-'){
2167                             minPos++;
2168                         }
2169                     }
2170                 }
2171                 else if(numDiff == minDiff){
2172                     minCount++;
2173                 }
2174                 
2175             }
2176             
2177             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2178             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2179             else{       //use the best match
2180                 seq.setUnaligned(rawSequence.substr(minPos));
2181                 
2182                 if(qual.getName() != ""){
2183                     qual.trimQScores(minPos, -1);
2184                 }
2185                 success = minDiff;
2186             }
2187             
2188             if (alignment != NULL) { delete alignment; }
2189             
2190         }
2191         
2192         
2193         return success;
2194         
2195     }
2196     catch(exception& e) {
2197         m->errorOut(e, "TrimOligos", "stripLinker");
2198         exit(1);
2199     }
2200 }
2201 //******************************************************************/
2202 bool TrimOligos::stripLinker(Sequence& seq){
2203     try {
2204         
2205         string rawSequence = seq.getUnaligned();
2206         bool success = ldiffs +1;       //guilty until proven innocent
2207         
2208         for(int i=0;i<linker.size();i++){
2209             string oligo = linker[i];
2210             
2211             if(rawSequence.length() < oligo.length()){
2212                 success = ldiffs +10;
2213                 break;
2214             }
2215             
2216             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2217                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2218                 success = 0;
2219                 break;
2220             }
2221         }       
2222         
2223         //if you found the linker or if you don't want to allow for diffs
2224         if ((ldiffs == 0) || (success == 0)) { return success; }
2225         
2226         else { //try aligning and see if you can find it
2227             Alignment* alignment;
2228             if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2229             else{ alignment = NULL; }
2230             
2231             //can you find the barcode
2232             int minDiff = 1e6;
2233             int minCount = 1;
2234             int minPos = 0;
2235             
2236             for(int i = 0; i < linker.size(); i++){
2237                 string oligo = linker[i];
2238                 // int length = oligo.length();
2239                 
2240                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2241                     success = ldiffs + 10;
2242                     break;
2243                 }
2244                 
2245                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2246                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2247                 oligo = alignment->getSeqAAln();
2248                 string temp = alignment->getSeqBAln();
2249                 
2250                 int alnLength = oligo.length();
2251                 
2252                 for(int i=oligo.length()-1;i>=0;i--){
2253                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2254                 }
2255                 oligo = oligo.substr(0,alnLength);
2256                 temp = temp.substr(0,alnLength);
2257                 
2258                 int numDiff = countDiffs(oligo, temp);
2259                 
2260                 if(numDiff < minDiff){
2261                     minDiff = numDiff;
2262                     minCount = 1;
2263                     minPos = 0;
2264                     for(int i=0;i<alnLength;i++){
2265                         if(temp[i] != '-'){
2266                             minPos++;
2267                         }
2268                     }
2269                 }
2270                 else if(numDiff == minDiff){
2271                     minCount++;
2272                 }
2273                 
2274             }
2275             
2276             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2277             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2278             else{       //use the best match
2279                 seq.setUnaligned(rawSequence.substr(minPos));
2280                 success = minDiff;
2281             }
2282             
2283             if (alignment != NULL) { delete alignment; }
2284             
2285         }
2286         
2287         return success;
2288         
2289     }
2290     catch(exception& e) {
2291         m->errorOut(e, "TrimOligos", "stripLinker");
2292         exit(1);
2293     }
2294 }
2295
2296 //******************************************************************/
2297 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2298     try {
2299         string rawSequence = seq.getUnaligned();
2300         bool success = sdiffs+1;        //guilty until proven innocent
2301         
2302         for(int i=0;i<spacer.size();i++){
2303             string oligo = spacer[i];
2304             
2305             if(rawSequence.length() < oligo.length()){
2306                 success = sdiffs+10;
2307                 break;
2308             }
2309             
2310             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2311                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2312                 if(qual.getName() != ""){
2313                     qual.trimQScores(oligo.length(), -1);
2314                 }
2315                 success = 0;
2316                 break;
2317             }
2318         }
2319         
2320         //if you found the spacer or if you don't want to allow for diffs
2321         if ((sdiffs == 0) || (success == 0)) { return success; }
2322         
2323         else { //try aligning and see if you can find it
2324             Alignment* alignment;
2325             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2326             else{ alignment = NULL; }
2327             
2328             //can you find the barcode
2329             int minDiff = 1e6;
2330             int minCount = 1;
2331             int minPos = 0;
2332             
2333             for(int i = 0; i < spacer.size(); i++){
2334                 string oligo = spacer[i];
2335                 // int length = oligo.length();
2336                 
2337                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2338                     success = sdiffs + 10;
2339                     break;
2340                 }
2341                 
2342                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2343                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2344                 oligo = alignment->getSeqAAln();
2345                 string temp = alignment->getSeqBAln();
2346                 
2347                 int alnLength = oligo.length();
2348                 
2349                 for(int i=oligo.length()-1;i>=0;i--){
2350                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2351                 }
2352                 oligo = oligo.substr(0,alnLength);
2353                 temp = temp.substr(0,alnLength);
2354                 
2355                 int numDiff = countDiffs(oligo, temp);
2356                 
2357                 if(numDiff < minDiff){
2358                     minDiff = numDiff;
2359                     minCount = 1;
2360                     minPos = 0;
2361                     for(int i=0;i<alnLength;i++){
2362                         if(temp[i] != '-'){
2363                             minPos++;
2364                         }
2365                     }
2366                 }
2367                 else if(numDiff == minDiff){
2368                     minCount++;
2369                 }
2370                 
2371             }
2372             
2373             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2374             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2375             else{       //use the best match
2376                 seq.setUnaligned(rawSequence.substr(minPos));
2377                 
2378                 if(qual.getName() != ""){
2379                     qual.trimQScores(minPos, -1);
2380                 }
2381                 success = minDiff;
2382             }
2383             
2384             if (alignment != NULL) { delete alignment; }
2385             
2386         }
2387         
2388         
2389         return success;
2390         
2391     }
2392     catch(exception& e) {
2393         m->errorOut(e, "TrimOligos", "stripSpacer");
2394         exit(1);
2395     }
2396 }
2397 //******************************************************************/
2398 bool TrimOligos::stripSpacer(Sequence& seq){
2399     try {
2400         
2401         string rawSequence = seq.getUnaligned();
2402         bool success = sdiffs+1;        //guilty until proven innocent
2403         
2404         for(int i=0;i<spacer.size();i++){
2405             string oligo = spacer[i];
2406             
2407             if(rawSequence.length() < oligo.length()){
2408                 success = sdiffs+10;
2409                 break;
2410             }
2411             
2412             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2413                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2414                 success = 0;
2415                 break;
2416             }
2417         }       
2418         
2419         //if you found the spacer or if you don't want to allow for diffs
2420         if ((sdiffs == 0) || (success == 0)) { return success; }
2421         
2422         else { //try aligning and see if you can find it
2423             Alignment* alignment;
2424             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2425             else{ alignment = NULL; }
2426             
2427             //can you find the barcode
2428             int minDiff = 1e6;
2429             int minCount = 1;
2430             int minPos = 0;
2431             
2432             for(int i = 0; i < spacer.size(); i++){
2433                 string oligo = spacer[i];
2434                 // int length = oligo.length();
2435                 
2436                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2437                     success = sdiffs + 10;
2438                     break;
2439                 }
2440                 
2441                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2442                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2443                 oligo = alignment->getSeqAAln();
2444                 string temp = alignment->getSeqBAln();
2445                 
2446                 int alnLength = oligo.length();
2447                 
2448                 for(int i=oligo.length()-1;i>=0;i--){
2449                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2450                 }
2451                 oligo = oligo.substr(0,alnLength);
2452                 temp = temp.substr(0,alnLength);
2453                 
2454                 int numDiff = countDiffs(oligo, temp);
2455                 
2456                 if(numDiff < minDiff){
2457                     minDiff = numDiff;
2458                     minCount = 1;
2459                     minPos = 0;
2460                     for(int i=0;i<alnLength;i++){
2461                         if(temp[i] != '-'){
2462                             minPos++;
2463                         }
2464                     }
2465                 }
2466                 else if(numDiff == minDiff){
2467                     minCount++;
2468                 }
2469                 
2470             }
2471             
2472             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2473             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2474             else{       //use the best match
2475                 seq.setUnaligned(rawSequence.substr(minPos));
2476                 success = minDiff;
2477             }
2478             
2479             if (alignment != NULL) { delete alignment; }
2480             
2481         }
2482         
2483         return success;
2484         
2485     }
2486     catch(exception& e) {
2487         m->errorOut(e, "TrimOligos", "stripSpacer");
2488         exit(1);
2489     }
2490 }
2491
2492 //******************************************************************/
2493 bool TrimOligos::compareDNASeq(string oligo, string seq){
2494     try {
2495         bool success = 1;
2496         int length = oligo.length();
2497         
2498         for(int i=0;i<length;i++){
2499             
2500             if(oligo[i] != seq[i]){
2501                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0; }
2502                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       success = 0;    }
2503                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       success = 0;    }
2504                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       success = 0;    }
2505                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       success = 0;    }
2506                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       success = 0;    }
2507                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       success = 0;    }
2508                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       success = 0;    }
2509                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2510                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2511                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
2512                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }       
2513                 
2514                 if(success == 0)        {       break;  }
2515             }
2516             else{
2517                 success = 1;
2518             }
2519         }
2520         
2521         return success;
2522     }
2523     catch(exception& e) {
2524         m->errorOut(e, "TrimOligos", "compareDNASeq");
2525         exit(1);
2526     }
2527     
2528 }
2529 //********************************************************************/
2530 int TrimOligos::countDiffs(string oligo, string seq){
2531     try {
2532         
2533         int length = oligo.length();
2534         int countDiffs = 0;
2535         
2536         for(int i=0;i<length;i++){
2537             
2538             if(oligo[i] != seq[i]){
2539                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++; }
2540                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       countDiffs++;   }
2541                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       countDiffs++;   }
2542                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       countDiffs++;   }
2543                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       countDiffs++;   }
2544                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       countDiffs++;   }
2545                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       countDiffs++;   }
2546                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       countDiffs++;   }
2547                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2548                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2549                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
2550                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
2551             }
2552             
2553         }
2554         
2555         return countDiffs;
2556     }
2557     catch(exception& e) {
2558         m->errorOut(e, "TrimOligos", "countDiffs");
2559         exit(1);
2560     }
2561 }
2562 //********************************************************************/
2563 string TrimOligos::reverseOligo(string oligo){
2564     try {
2565         string reverse = "";
2566         
2567         for(int i=oligo.length()-1;i>=0;i--){
2568             
2569             if(oligo[i] == 'A') {       reverse += 'T'; }
2570             else if(oligo[i] == 'T'){   reverse += 'A'; }
2571             else if(oligo[i] == 'U'){   reverse += 'A'; }
2572             
2573             else if(oligo[i] == 'G'){   reverse += 'C'; }
2574             else if(oligo[i] == 'C'){   reverse += 'G'; }
2575             
2576             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2577             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2578             
2579             else if(oligo[i] == 'M'){   reverse += 'K'; }
2580             else if(oligo[i] == 'K'){   reverse += 'M'; }
2581             
2582             else if(oligo[i] == 'W'){   reverse += 'W'; }
2583             else if(oligo[i] == 'S'){   reverse += 'S'; }
2584             
2585             else if(oligo[i] == 'B'){   reverse += 'V'; }
2586             else if(oligo[i] == 'V'){   reverse += 'B'; }
2587             
2588             else if(oligo[i] == 'D'){   reverse += 'H'; }
2589             else if(oligo[i] == 'H'){   reverse += 'D'; }
2590             
2591             else if(oligo[i] == '-'){   reverse += '-'; }
2592             else        {       reverse += 'N'; }
2593         }
2594         
2595         
2596         return reverse;
2597     }
2598     catch(exception& e) {
2599         m->errorOut(e, "TrimOligos", "reverseOligo");
2600         exit(1);
2601     }
2602 }
2603
2604 /********************************************************************/
2605
2606