]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
changes while testing
[mothur.git] / trimoligos.cpp
1 /*
2  * trimoligos.cpp
3  * Mothur
4  *
5  * Created by westcott on 9/1/11.
6  * Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18     try {
19         m = MothurOut::getInstance();
20         paired = false;
21         
22         pdiffs = p;
23         bdiffs = b;
24         ldiffs = l;
25         sdiffs = s;
26         
27         barcodes = br;
28         primers = pr;
29         revPrimer = r;
30         linker = lk;
31         spacer = sp;
32         maxFBarcodeLength = 0;
33         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34             if(it->first.length() > maxFBarcodeLength){
35                 maxFBarcodeLength = it->first.length();
36             }
37         }
38         maxFPrimerLength = 0;
39         map<string,int>::iterator it;
40         for(it=primers.begin();it!=primers.end();it++){
41             if(it->first.length() > maxFPrimerLength){
42                 maxFPrimerLength = it->first.length();
43             }
44         }
45         
46         maxLinkerLength = 0;
47         for(int i = 0; i < linker.size(); i++){
48             if(linker[i].length() > maxLinkerLength){
49                 maxLinkerLength = linker[i].length();
50             }
51         }
52         
53         maxSpacerLength = 0;
54         for(int i = 0; i < spacer.size(); i++){
55             if(spacer[i].length() > maxSpacerLength){
56                 maxSpacerLength = spacer[i].length();
57             }
58         }
59     }
60     catch(exception& e) {
61         m->errorOut(e, "TrimOligos", "TrimOligos");
62         exit(1);
63     }
64 }
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
68     try {
69         m = MothurOut::getInstance();
70         
71         pdiffs = p;
72         bdiffs = b;
73         ldiffs = l;
74         sdiffs = s;
75         paired = true;
76         
77         maxFBarcodeLength = 0;
78         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79             string forward = it->second.forward;
80             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
81             
82             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length(); }
83             
84             if (itForward == ifbarcodes.end()) {
85                 vector<int> temp; temp.push_back(it->first);
86                 ifbarcodes[forward] = temp;
87             }else { itForward->second.push_back(it->first); }
88         }
89         
90         maxFPrimerLength = 0;
91         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92             string forward = it->second.forward;
93             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
94             
95             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length(); }
96             
97             if (itForward == ifprimers.end()) {
98                 vector<int> temp; temp.push_back(it->first);
99                 ifprimers[forward] = temp;
100             }else { itForward->second.push_back(it->first); }
101         }
102         
103         maxRBarcodeLength = 0;
104         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105             string forward = it->second.reverse;
106             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
107             
108             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length(); }
109             
110             if (itForward == irbarcodes.end()) {
111                 vector<int> temp; temp.push_back(it->first);
112                 irbarcodes[forward] = temp;
113             }else { itForward->second.push_back(it->first); }
114         }
115         
116         maxRPrimerLength = 0;
117         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118             string forward = it->second.reverse;
119             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
120             
121             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length(); }
122             
123             if (itForward == irprimers.end()) {
124                 vector<int> temp; temp.push_back(it->first);
125                 irprimers[forward] = temp;
126             }else { itForward->second.push_back(it->first); }
127         }
128         
129         ipbarcodes = br;
130         ipprimers = pr;
131     }
132     catch(exception& e) {
133         m->errorOut(e, "TrimOligos", "TrimOligos");
134         exit(1);
135     }
136 }
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
140     try {
141         m = MothurOut::getInstance();
142         
143         pdiffs = p;
144         bdiffs = b;
145         
146         barcodes = br;
147         primers = pr;
148         revPrimer = r;
149         paired = false;
150         
151         maxFBarcodeLength = 0;
152         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153             string oligo = it->first;
154             if(oligo.length() > maxFBarcodeLength){
155                 maxFBarcodeLength = oligo.length();
156             }
157         }
158         maxRBarcodeLength = maxFBarcodeLength;
159         
160         maxFPrimerLength = 0;
161         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162             string oligo = it->first;
163             if(oligo.length() > maxFPrimerLength){
164                 maxFPrimerLength = oligo.length();
165             }
166         }
167         maxRPrimerLength = maxFPrimerLength;
168     }
169     catch(exception& e) {
170         m->errorOut(e, "TrimOligos", "TrimOligos");
171         exit(1);
172     }
173 }
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
178     try {
179         
180         string rawSequence = seq.getUnaligned();
181         
182         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183             string oligo = it->first;
184             
185             if(rawSequence.length() < oligo.length()) { break; }
186             
187             //search for primer
188             int olength = oligo.length();
189             for (int j = 0; j < rawSequence.length()-olength; j++){
190                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
191                 string rawChunk = rawSequence.substr(j, olength);
192                 if(compareDNASeq(oligo, rawChunk)) {
193                     primerStart = j;
194                     primerEnd = primerStart + olength;
195                     return true;
196                 }
197                 
198             }
199         }
200         
201         primerStart = 0; primerEnd = 0;
202         //if you don't want to allow for diffs
203         if (pdiffs == 0) { return false; }
204         else { //try aligning and see if you can find it
205             
206             //can you find the barcode
207             int minDiff = 1e6;
208             int minCount = 1;
209             
210             Alignment* alignment;
211             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
212             else{ alignment = NULL; }
213             
214             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
215                 
216                 string prim = it->first;
217                 //search for primer
218                 int olength = prim.length();
219                 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
220                 else{
221                     for (int j = 0; j < rawSequence.length()-olength; j++){
222                         
223                         string oligo = it->first;
224                         
225                         if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
226                         
227                         string rawChunk = rawSequence.substr(j, olength+pdiffs);
228                         
229                         //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230                         alignment->alignPrimer(oligo, rawChunk);
231                         oligo = alignment->getSeqAAln();
232                         string temp = alignment->getSeqBAln();
233                         
234                         int alnLength = oligo.length();
235                         
236                         for(int i=oligo.length()-1;i>=0;i--){
237                             if(oligo[i] != '-'){        alnLength = i+1;        break;  }
238                         }
239                         oligo = oligo.substr(0,alnLength);
240                         temp = temp.substr(0,alnLength);
241                         
242                         int numDiff = countDiffs(oligo, temp);
243                         
244                         if(numDiff < minDiff){
245                             minDiff = numDiff;
246                             minCount = 1;
247                             primerStart = j;
248                             primerEnd = primerStart + alnLength;
249                         }else if(numDiff == minDiff){ minCount++; }
250                     }
251                 }
252             }
253             
254             if (alignment != NULL) { delete alignment; }
255             
256             if(minDiff > pdiffs)        {       primerStart = 0; primerEnd = 0; return false;   }       //no good matches
257             else if(minCount > 1)       {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
258             else{ return true; }
259         }
260         
261         primerStart = 0; primerEnd = 0;
262         return false;
263         
264     }
265     catch(exception& e) {
266         m->errorOut(e, "TrimOligos", "stripForward");
267         exit(1);
268     }
269 }
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
272     try {
273         
274         string rawSequence = seq.getUnaligned();
275         
276         for(int i=0;i<revPrimer.size();i++){
277             string oligo = revPrimer[i];
278             if(rawSequence.length() < oligo.length()) { break; }
279             
280             //search for primer
281             int olength = oligo.length();
282             for (int j = rawSequence.length()-olength; j >= 0; j--){
283                 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
284                 string rawChunk = rawSequence.substr(j, olength);
285                 
286                 if(compareDNASeq(oligo, rawChunk)) {
287                     primerStart = j;
288                     primerEnd = primerStart + olength;
289                     return true;
290                 }
291                 
292             }
293         }
294         
295         primerStart = 0; primerEnd = 0;
296         return false;
297     }
298     catch(exception& e) {
299         m->errorOut(e, "PcrSeqsCommand", "findReverse");
300         exit(1);
301     }
302 }
303 //*******************************************************************/
304
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
306     try {
307         
308         if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
309         
310         string rawSequence = seq.getUnaligned();
311         int success = bdiffs + 1;       //guilty until proven innocent
312         
313         //can you find the barcode
314         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315             string oligo = it->first;
316             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
317                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
318                 break;
319             }
320             
321             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
322                 group = it->second;
323                 seq.setUnaligned(rawSequence.substr(oligo.length()));
324                 
325                 if(qual.getName() != ""){
326                     qual.trimQScores(oligo.length(), -1);
327                 }
328                 
329                 success = 0;
330                 break;
331             }
332         }
333         
334         //if you found the barcode or if you don't want to allow for diffs
335         if ((bdiffs == 0) || (success == 0)) { return success; }
336         
337         else { //try aligning and see if you can find it
338             Alignment* alignment;
339             if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
340             else{ alignment = NULL; }
341             
342             //can you find the barcode
343             int minDiff = 1e6;
344             int minCount = 1;
345             int minGroup = -1;
346             int minPos = 0;
347             
348             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349                 string oligo = it->first;
350                 // int length = oligo.length();
351                 
352                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
353                     success = bdiffs + 10;
354                     break;
355                 }
356                 
357                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359                 oligo = alignment->getSeqAAln();
360                 string temp = alignment->getSeqBAln();
361                 
362                 int alnLength = oligo.length();
363                 
364                 for(int i=oligo.length()-1;i>=0;i--){
365                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
366                 }
367                 oligo = oligo.substr(0,alnLength);
368                 temp = temp.substr(0,alnLength);
369                 int numDiff = countDiffs(oligo, temp);
370                 
371                 if(numDiff < minDiff){
372                     minDiff = numDiff;
373                     minCount = 1;
374                     minGroup = it->second;
375                     minPos = 0;
376                     for(int i=0;i<alnLength;i++){
377                         if(temp[i] != '-'){
378                             minPos++;
379                         }
380                     }
381                 }
382                 else if(numDiff == minDiff){
383                     minCount++;
384                 }
385                 
386             }
387             
388             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
389             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
390             else{       //use the best match
391                 group = minGroup;
392                 seq.setUnaligned(rawSequence.substr(minPos));
393                 
394                 if(qual.getName() != ""){
395                     qual.trimQScores(minPos, -1);
396                 }
397                 success = minDiff;
398             }
399             
400             if (alignment != NULL) { delete alignment; }
401             
402         }
403         
404         return success;
405         
406     }
407     catch(exception& e) {
408         m->errorOut(e, "TrimOligos", "stripBarcode");
409         exit(1);
410     }
411 }
412 //*******************************************************************/
413 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
414     try {
415         //look for forward barcode
416         string rawFSequence = forwardSeq.getUnaligned();
417         string rawRSequence = reverseSeq.getUnaligned();
418         int success = bdiffs + 1;       //guilty until proven innocent
419         
420         //can you find the forward barcode
421         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
422             string foligo = it->second.forward;
423             string roligo = it->second.reverse;
424             
425             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
426                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
427                 break;
428             }
429             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
430                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
431                 break;
432             }
433             
434             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
435                 group = it->first;
436                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
437                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
438                 success = 0;
439                 break;
440             }
441         }
442         
443         //if you found the barcode or if you don't want to allow for diffs
444         if ((bdiffs == 0) || (success == 0)) { return success; }
445         else { //try aligning and see if you can find it
446             Alignment* alignment;
447             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
448             else{ alignment = NULL; }
449             
450             //can you find the barcode
451             int minDiff = 1e6;
452             int minCount = 1;
453             vector< vector<int> > minFGroup;
454             vector<int> minFPos;
455             
456             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
457             /*
458              1 Sarah Westcott
459              2 John Westcott
460              3 Anna Westcott
461              4 Sarah Schloss
462              5 Pat Schloss
463              6 Gail Brown
464              7 Pat Moore
465              only want to look for forward = Sarah, John, Anna, Pat, Gail
466              reverse = Westcott, Schloss, Brown, Moore
467              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
468              */
469             //cout << endl << forwardSeq.getName() << endl;
470             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
471                 string oligo = it->first;
472                 
473                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
474                     success = bdiffs + 10;
475                     break;
476                 }
477                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
478                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
479                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
480                 oligo = alignment->getSeqAAln();
481                 string temp = alignment->getSeqBAln();
482                 
483                 int alnLength = oligo.length();
484                 
485                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
486                 oligo = oligo.substr(0,alnLength);
487                 temp = temp.substr(0,alnLength);
488                 int numDiff = countDiffs(oligo, temp);
489                 
490                 if (alnLength == 0) { numDiff = bdiffs + 100; }
491                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
492                 
493                 if(numDiff < minDiff){
494                     minDiff = numDiff;
495                     minCount = 1;
496                     minFGroup.clear();
497                     minFGroup.push_back(it->second);
498                     int tempminFPos = 0;
499                     minFPos.clear();
500                     for(int i=0;i<alnLength;i++){
501                         if(temp[i] != '-'){
502                             tempminFPos++;
503                         }
504                     }
505                     minFPos.push_back(tempminFPos);
506                 }else if(numDiff == minDiff){
507                     minFGroup.push_back(it->second);
508                     int tempminFPos = 0;
509                     for(int i=0;i<alnLength;i++){
510                         if(temp[i] != '-'){
511                             tempminFPos++;
512                         }
513                     }
514                     minFPos.push_back(tempminFPos);
515                 }
516             }
517             
518             //cout << minDiff << '\t' << minCount << '\t' << endl;
519             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
520             else{
521                 //check for reverse match
522                 if (alignment != NULL) { delete alignment; }
523                 
524                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
525                 else{ alignment = NULL; }
526                 
527                 //can you find the barcode
528                 minDiff = 1e6;
529                 minCount = 1;
530                 vector< vector<int> > minRGroup;
531                 vector<int> minRPos;
532                 
533                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
534                     string oligo = it->first;
535                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
536                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
537                         success = bdiffs + 10;
538                         break;
539                     }
540                     
541                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
542                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
543                     oligo = alignment->getSeqAAln();
544                     string temp = alignment->getSeqBAln();
545                     
546                     int alnLength = oligo.length();
547                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
548                     oligo = oligo.substr(0,alnLength);
549                     temp = temp.substr(0,alnLength);
550                     int numDiff = countDiffs(oligo, temp);
551                     if (alnLength == 0) { numDiff = bdiffs + 100; }
552                     
553                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
554                     if(numDiff < minDiff){
555                         minDiff = numDiff;
556                         minCount = 1;
557                         minRGroup.clear();
558                         minRGroup.push_back(it->second);
559                         int tempminRPos = 0;
560                         minRPos.clear();
561                         for(int i=0;i<alnLength;i++){
562                             if(temp[i] != '-'){
563                                 tempminRPos++;
564                             }
565                         }
566                         minRPos.push_back(tempminRPos);
567                     }else if(numDiff == minDiff){
568                         int tempminRPos = 0;
569                         for(int i=0;i<alnLength;i++){
570                             if(temp[i] != '-'){
571                                 tempminRPos++;
572                             }
573                         }
574                         minRPos.push_back(tempminRPos);
575                         minRGroup.push_back(it->second);
576                     }
577                     
578                 }
579                 
580                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
581                  for (int i = 0; i < minFGroup.size(); i++) {
582                  cout << i << '\t';
583                  for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
584                  cout << endl;
585                  }
586                  cout << endl;
587                  for (int i = 0; i < minRGroup.size(); i++) {
588                  cout << i << '\t';
589                  for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
590                  cout << endl;
591                  }
592                  cout << endl;*/
593                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
594                 else {
595                     bool foundMatch = false;
596                     vector<int> matches;
597                     for (int i = 0; i < minFGroup.size(); i++) {
598                         for (int j = 0; j < minFGroup[i].size(); j++) {
599                             for (int k = 0; k < minRGroup.size(); k++) {
600                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
601                             }
602                         }
603                     }
604                     
605                     int fStart = 0;
606                     int rStart = 0;
607                     if (matches.size() == 1) {
608                         foundMatch = true;
609                         group = matches[0];
610                         fStart = minFPos[0];
611                         rStart = minRPos[0];
612                     }
613                     
614                     //we have an acceptable match for the forward and reverse, but do they match?
615                     if (foundMatch) {
616                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
617                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
618                         success = minDiff;
619                     }else { success = bdiffs + 100;     }
620                 }
621             }
622             
623             if (alignment != NULL) { delete alignment; }
624         }
625         
626         return success;
627         
628     }
629     catch(exception& e) {
630         m->errorOut(e, "TrimOligos", "stripIBarcode");
631         exit(1);
632     }
633     
634 }
635 //*******************************************************************/
636 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
637     try {
638         //look for forward barcode
639         string rawFSequence = forwardSeq.getUnaligned();
640         string rawRSequence = reverseSeq.getUnaligned();
641         int success = bdiffs + 1;       //guilty until proven innocent
642         
643         //can you find the forward barcode
644         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
645             string foligo = it->second.forward;
646             string roligo = it->second.reverse;
647             
648             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
649                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
650                 break;
651             }
652             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
653                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
654                 break;
655             }
656             
657             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
658                 group = it->first;
659                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
660                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
661                 forwardQual.trimQScores(foligo.length(), -1);
662                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
663                 success = 0;
664                 break;
665             }
666         }
667         
668         //if you found the barcode or if you don't want to allow for diffs
669         if ((bdiffs == 0) || (success == 0)) { return success; }
670         else { //try aligning and see if you can find it
671             Alignment* alignment;
672             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
673             else{ alignment = NULL; }
674             
675             //can you find the barcode
676             int minDiff = 1e6;
677             int minCount = 1;
678             vector< vector<int> > minFGroup;
679             vector<int> minFPos;
680             
681             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
682             /*
683              1 Sarah Westcott
684              2 John Westcott
685              3 Anna Westcott
686              4 Sarah Schloss
687              5 Pat Schloss
688              6 Gail Brown
689              7 Pat Moore
690              only want to look for forward = Sarah, John, Anna, Pat, Gail
691              reverse = Westcott, Schloss, Brown, Moore
692              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
693              */
694             //cout << endl << forwardSeq.getName() << endl;
695             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
696                 string oligo = it->first;
697                 
698                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
699                     success = bdiffs + 10;
700                     break;
701                 }
702                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
703                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
704                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
705                 oligo = alignment->getSeqAAln();
706                 string temp = alignment->getSeqBAln();
707                 
708                 int alnLength = oligo.length();
709                 
710                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
711                 oligo = oligo.substr(0,alnLength);
712                 temp = temp.substr(0,alnLength);
713                 int numDiff = countDiffs(oligo, temp);
714                 
715                 if (alnLength == 0) { numDiff = bdiffs + 100; }
716                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
717                 
718                 if(numDiff < minDiff){
719                     minDiff = numDiff;
720                     minCount = 1;
721                     minFGroup.clear();
722                     minFGroup.push_back(it->second);
723                     int tempminFPos = 0;
724                     minFPos.clear();
725                     for(int i=0;i<alnLength;i++){
726                         if(temp[i] != '-'){
727                             tempminFPos++;
728                         }
729                     }
730                     minFPos.push_back(tempminFPos);
731                 }else if(numDiff == minDiff){
732                     minFGroup.push_back(it->second);
733                     int tempminFPos = 0;
734                     for(int i=0;i<alnLength;i++){
735                         if(temp[i] != '-'){
736                             tempminFPos++;
737                         }
738                     }
739                     minFPos.push_back(tempminFPos);
740                 }
741             }
742             
743             //cout << minDiff << '\t' << minCount << '\t' << endl;
744             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
745             else{
746                 //check for reverse match
747                 if (alignment != NULL) { delete alignment; }
748                 
749                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
750                 else{ alignment = NULL; }
751                 
752                 //can you find the barcode
753                 minDiff = 1e6;
754                 minCount = 1;
755                 vector< vector<int> > minRGroup;
756                 vector<int> minRPos;
757                 
758                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
759                     string oligo = it->first;
760                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
761                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
762                         success = bdiffs + 10;
763                         break;
764                     }
765                     
766                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
767                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
768                     oligo = alignment->getSeqAAln();
769                     string temp = alignment->getSeqBAln();
770                     
771                     int alnLength = oligo.length();
772                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
773                     oligo = oligo.substr(0,alnLength);
774                     temp = temp.substr(0,alnLength);
775                     int numDiff = countDiffs(oligo, temp);
776                     if (alnLength == 0) { numDiff = bdiffs + 100; }
777                     
778                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
779                     if(numDiff < minDiff){
780                         minDiff = numDiff;
781                         minCount = 1;
782                         minRGroup.clear();
783                         minRGroup.push_back(it->second);
784                         int tempminRPos = 0;
785                         minRPos.clear();
786                         for(int i=0;i<alnLength;i++){
787                             if(temp[i] != '-'){
788                                 tempminRPos++;
789                             }
790                         }
791                         minRPos.push_back(tempminRPos);
792                     }else if(numDiff == minDiff){
793                         int tempminRPos = 0;
794                         for(int i=0;i<alnLength;i++){
795                             if(temp[i] != '-'){
796                                 tempminRPos++;
797                             }
798                         }
799                         minRPos.push_back(tempminRPos);
800                         minRGroup.push_back(it->second);
801                     }
802                     
803                 }
804                 
805                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
806                  for (int i = 0; i < minFGroup.size(); i++) {
807                  cout << i << '\t';
808                  for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
809                  cout << endl;
810                  }
811                  cout << endl;
812                  for (int i = 0; i < minRGroup.size(); i++) {
813                  cout << i << '\t';
814                  for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
815                  cout << endl;
816                  }
817                  cout << endl;*/
818                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
819                 else {
820                     bool foundMatch = false;
821                     vector<int> matches;
822                     for (int i = 0; i < minFGroup.size(); i++) {
823                         for (int j = 0; j < minFGroup[i].size(); j++) {
824                             for (int k = 0; k < minRGroup.size(); k++) {
825                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
826                             }
827                         }
828                     }
829                     
830                     int fStart = 0;
831                     int rStart = 0;
832                     if (matches.size() == 1) {
833                         foundMatch = true;
834                         group = matches[0];
835                         fStart = minFPos[0];
836                         rStart = minRPos[0];
837                     }
838                     
839                     //we have an acceptable match for the forward and reverse, but do they match?
840                     if (foundMatch) {
841                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
842                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
843                         forwardQual.trimQScores(fStart, -1);
844                         reverseQual.trimQScores(rStart, -1);
845                         success = minDiff;
846                     }else { success = bdiffs + 100;     }
847                 }
848             }
849             
850             if (alignment != NULL) { delete alignment; }
851         }
852         
853         return success;
854         
855     }
856     catch(exception& e) {
857         m->errorOut(e, "TrimOligos", "stripIBarcode");
858         exit(1);
859     }
860     
861 }
862 //*******************************************************************/
863 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
864     try {
865         //look for forward barcode
866         string rawSeq = seq.getUnaligned();
867         int success = bdiffs + 1;       //guilty until proven innocent
868         //cout << seq.getName() << endl;
869         //can you find the forward barcode
870         for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
871             string foligo = it->second.forward;
872             string roligo = it->second.reverse;
873             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
874             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
875             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
876                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
877                 break;
878             }
879             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
880                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
881                 break;
882             }
883             
884             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
885                 group = it->first;
886                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
887                 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
888                 if(qual.getName() != ""){
889                     qual.trimQScores(-1, rawSeq.length()-roligo.length());
890                     qual.trimQScores(foligo.length(), -1);
891                 }
892                 success = 0;
893                 break;
894             }
895         }
896         //cout << "success=" << success << endl;
897         //if you found the barcode or if you don't want to allow for diffs
898         if ((bdiffs == 0) || (success == 0)) { return success; }
899         else { //try aligning and see if you can find it
900             Alignment* alignment;
901             if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
902             else{ alignment = NULL; }
903             
904             //can you find the barcode
905             int minDiff = 1e6;
906             int minCount = 1;
907             vector< vector<int> > minFGroup;
908             vector<int> minFPos;
909             
910             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
911             /*
912              1 Sarah Westcott
913              2 John Westcott
914              3 Anna Westcott
915              4 Sarah Schloss
916              5 Pat Schloss
917              6 Gail Brown
918              7 Pat Moore
919              only want to look for forward = Sarah, John, Anna, Pat, Gail
920              reverse = Westcott, Schloss, Brown, Moore
921              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
922              */
923             //cout << endl << forwardSeq.getName() << endl;
924             for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
925                 string oligo = it->first;
926                 
927                 if(rawSeq.length() < maxFBarcodeLength){        //let's just assume that the barcodes are the same length
928                     success = bdiffs + 10;
929                     break;
930                 }
931                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
932                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
933                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
934                 oligo = alignment->getSeqAAln();
935                 string temp = alignment->getSeqBAln();
936                 
937                 int alnLength = oligo.length();
938                 
939                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
940                 oligo = oligo.substr(0,alnLength);
941                 temp = temp.substr(0,alnLength);
942                 int numDiff = countDiffs(oligo, temp);
943                 
944                 if (alnLength == 0) { numDiff = bdiffs + 100; }
945                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
946                 
947                 if(numDiff < minDiff){
948                     minDiff = numDiff;
949                     minCount = 1;
950                     minFGroup.clear();
951                     minFGroup.push_back(it->second);
952                     int tempminFPos = 0;
953                     minFPos.clear();
954                     for(int i=0;i<alnLength;i++){
955                         if(temp[i] != '-'){
956                             tempminFPos++;
957                         }
958                     }
959                     minFPos.push_back(tempminFPos);
960                 }else if(numDiff == minDiff){
961                     minFGroup.push_back(it->second);
962                     int tempminFPos = 0;
963                     for(int i=0;i<alnLength;i++){
964                         if(temp[i] != '-'){
965                             tempminFPos++;
966                         }
967                     }
968                     minFPos.push_back(tempminFPos);
969                 }
970             }
971             
972             //cout << minDiff << '\t' << minCount << '\t' << endl;
973             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
974             else{
975                 //check for reverse match
976                 if (alignment != NULL) { delete alignment; }
977                 
978                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
979                 else{ alignment = NULL; }
980                 
981                 //can you find the barcode
982                 minDiff = 1e6;
983                 minCount = 1;
984                 vector< vector<int> > minRGroup;
985                 vector<int> minRPos;
986                 
987                 string rawRSequence = reverseOligo(seq.getUnaligned());
988                 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
989                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
990                     string oligo = reverseOligo(it->first);
991                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
992                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
993                         success = bdiffs + 10;
994                         break;
995                     }
996                     
997                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
998                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
999                     oligo = alignment->getSeqAAln();
1000                     string temp = alignment->getSeqBAln();
1001                     
1002                     int alnLength = oligo.length();
1003                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1004                     oligo = oligo.substr(0,alnLength);
1005                     temp = temp.substr(0,alnLength);
1006                     int numDiff = countDiffs(oligo, temp);
1007                     if (alnLength == 0) { numDiff = bdiffs + 100; }
1008                     
1009                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1010                     if(numDiff < minDiff){
1011                         minDiff = numDiff;
1012                         minCount = 1;
1013                         minRGroup.clear();
1014                         minRGroup.push_back(it->second);
1015                         int tempminRPos = 0;
1016                         minRPos.clear();
1017                         for(int i=0;i<alnLength;i++){
1018                             if(temp[i] != '-'){
1019                                 tempminRPos++;
1020                             }
1021                         }
1022                         minRPos.push_back(tempminRPos);
1023                     }else if(numDiff == minDiff){
1024                         int tempminRPos = 0;
1025                         for(int i=0;i<alnLength;i++){
1026                             if(temp[i] != '-'){
1027                                 tempminRPos++;
1028                             }
1029                         }
1030                         minRPos.push_back(tempminRPos);
1031                         minRGroup.push_back(it->second);
1032                     }
1033                     
1034                 }
1035                 
1036                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1037                  for (int i = 0; i < minFGroup.size(); i++) {
1038                  cout << i << '\t';
1039                  for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1040                  cout << endl;
1041                  }
1042                  cout << endl;
1043                  for (int i = 0; i < minRGroup.size(); i++) {
1044                  cout << i << '\t';
1045                  for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1046                  cout << endl;
1047                  }
1048                  cout << endl;*/
1049                 if(minDiff > bdiffs)    {       success = minDiff;      }       //no good matches
1050                 else {
1051                     bool foundMatch = false;
1052                     vector<int> matches;
1053                     for (int i = 0; i < minFGroup.size(); i++) {
1054                         for (int j = 0; j < minFGroup[i].size(); j++) {
1055                             for (int k = 0; k < minRGroup.size(); k++) {
1056                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1057                             }
1058                         }
1059                     }
1060                     
1061                     int fStart = 0;
1062                     int rStart = 0;
1063                     if (matches.size() == 1) {
1064                         foundMatch = true;
1065                         group = matches[0];
1066                         fStart = minFPos[0];
1067                         rStart = rawSeq.length() - minRPos[0];
1068                     }
1069                     
1070                     //we have an acceptable match for the forward and reverse, but do they match?
1071                     if (foundMatch) {
1072                         string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1073                         seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1074                         if(qual.getName() != ""){
1075                             qual.trimQScores(-1, rStart);
1076                             qual.trimQScores(fStart, -1);
1077                         }
1078                         success = minDiff;
1079                         //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1080                     }else { success = bdiffs + 100;     }
1081                 }
1082             }
1083             
1084             if (alignment != NULL) { delete alignment; }
1085         }
1086         
1087         return success;
1088         
1089     }
1090     catch(exception& e) {
1091         m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1092         exit(1);
1093     }
1094     
1095 }
1096 //*******************************************************************/
1097 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1098     try {
1099         //look for forward
1100         string rawSeq = seq.getUnaligned();
1101         int success = pdiffs + 1;       //guilty until proven innocent
1102         //cout << seq.getName() << endl;
1103         //can you find the forward
1104         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1105             string foligo = it->second.forward;
1106             string roligo = it->second.reverse;
1107             
1108             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1109             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1110             if(rawSeq.length() < foligo.length()){      //let's just assume that the barcodes are the same length
1111                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1112                 break;
1113             }
1114             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
1115                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1116                 break;
1117             }
1118             
1119             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1120                 group = it->first;
1121                 if (!keepForward) {
1122                     string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1123                     seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1124                     if(qual.getName() != ""){
1125                         qual.trimQScores(-1, rawSeq.length()-roligo.length());
1126                         qual.trimQScores(foligo.length(), -1);
1127                     }
1128                 }
1129                 success = 0;
1130                 break;
1131             }
1132         }
1133         //cout << "success=" << success << endl;
1134         //if you found the barcode or if you don't want to allow for diffs
1135         if ((pdiffs == 0) || (success == 0)) { return success; }
1136         else { //try aligning and see if you can find it
1137             Alignment* alignment;
1138             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1139             else{ alignment = NULL; }
1140             
1141             //can you find the barcode
1142             int minDiff = 1e6;
1143             int minCount = 1;
1144             vector< vector<int> > minFGroup;
1145             vector<int> minFPos;
1146             
1147             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1148             /*
1149              1 Sarah Westcott
1150              2 John Westcott
1151              3 Anna Westcott
1152              4 Sarah Schloss
1153              5 Pat Schloss
1154              6 Gail Brown
1155              7 Pat Moore
1156              only want to look for forward = Sarah, John, Anna, Pat, Gail
1157              reverse = Westcott, Schloss, Brown, Moore
1158              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1159              */
1160             //cout << endl << forwardSeq.getName() << endl;
1161             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1162                 string oligo = it->first;
1163                 
1164                 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1165                     success = pdiffs + 10;
1166                     break;
1167                 }
1168                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
1169                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1170                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1171                 oligo = alignment->getSeqAAln();
1172                 string temp = alignment->getSeqBAln();
1173                 
1174                 int alnLength = oligo.length();
1175                 
1176                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1177                 oligo = oligo.substr(0,alnLength);
1178                 temp = temp.substr(0,alnLength);
1179                 int numDiff = countDiffs(oligo, temp);
1180                 
1181                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1182                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1183                 
1184                 if(numDiff < minDiff){
1185                     minDiff = numDiff;
1186                     minCount = 1;
1187                     minFGroup.clear();
1188                     minFGroup.push_back(it->second);
1189                     int tempminFPos = 0;
1190                     minFPos.clear();
1191                     for(int i=0;i<alnLength;i++){
1192                         if(temp[i] != '-'){
1193                             tempminFPos++;
1194                         }
1195                     }
1196                     minFPos.push_back(tempminFPos);
1197                 }else if(numDiff == minDiff){
1198                     minFGroup.push_back(it->second);
1199                     int tempminFPos = 0;
1200                     for(int i=0;i<alnLength;i++){
1201                         if(temp[i] != '-'){
1202                             tempminFPos++;
1203                         }
1204                     }
1205                     minFPos.push_back(tempminFPos);
1206                 }
1207             }
1208             
1209             //cout << minDiff << '\t' << minCount << '\t' << endl;
1210             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1211             else{
1212                 //check for reverse match
1213                 if (alignment != NULL) { delete alignment; }
1214                 
1215                 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1216                 else{ alignment = NULL; }
1217                 
1218                 //can you find the barcode
1219                 minDiff = 1e6;
1220                 minCount = 1;
1221                 vector< vector<int> > minRGroup;
1222                 vector<int> minRPos;
1223                 
1224                 string rawRSequence = reverseOligo(seq.getUnaligned());
1225                 
1226                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1227                     string oligo = reverseOligo(it->first);
1228                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1229                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1230                         success = pdiffs + 10;
1231                         break;
1232                     }
1233                     
1234                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1235                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1236                     oligo = alignment->getSeqAAln();
1237                     string temp = alignment->getSeqBAln();
1238                     
1239                     int alnLength = oligo.length();
1240                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1241                     oligo = oligo.substr(0,alnLength);
1242                     temp = temp.substr(0,alnLength);
1243                     int numDiff = countDiffs(oligo, temp);
1244                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1245                     
1246                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1247                     if(numDiff < minDiff){
1248                         minDiff = numDiff;
1249                         minCount = 1;
1250                         minRGroup.clear();
1251                         minRGroup.push_back(it->second);
1252                         int tempminRPos = 0;
1253                         minRPos.clear();
1254                         for(int i=0;i<alnLength;i++){
1255                             if(temp[i] != '-'){
1256                                 tempminRPos++;
1257                             }
1258                         }
1259                         minRPos.push_back(tempminRPos);
1260                     }else if(numDiff == minDiff){
1261                         int tempminRPos = 0;
1262                         for(int i=0;i<alnLength;i++){
1263                             if(temp[i] != '-'){
1264                                 tempminRPos++;
1265                             }
1266                         }
1267                         minRPos.push_back(tempminRPos);
1268                         minRGroup.push_back(it->second);
1269                     }
1270                     
1271                 }
1272                 
1273                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1274                  for (int i = 0; i < minFGroup.size(); i++) {
1275                  cout << i << '\t';
1276                  for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1277                  cout << endl;
1278                  }
1279                  cout << endl;
1280                  for (int i = 0; i < minRGroup.size(); i++) {
1281                  cout << i << '\t';
1282                  for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1283                  cout << endl;
1284                  }
1285                  cout << endl;*/
1286                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1287                 else {
1288                     bool foundMatch = false;
1289                     vector<int> matches;
1290                     for (int i = 0; i < minFGroup.size(); i++) {
1291                         for (int j = 0; j < minFGroup[i].size(); j++) {
1292                             for (int k = 0; k < minRGroup.size(); k++) {
1293                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1294                             }
1295                         }
1296                     }
1297                     
1298                     int fStart = 0;
1299                     int rStart = 0;
1300                     if (matches.size() == 1) {
1301                         foundMatch = true;
1302                         group = matches[0];
1303                         fStart = minFPos[0];
1304                         rStart = rawSeq.length() - minRPos[0];
1305                     }
1306                     
1307                     //we have an acceptable match for the forward and reverse, but do they match?
1308                     if (foundMatch) {
1309                         if (!keepForward) {
1310                             string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1311                             seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1312                             if(qual.getName() != ""){
1313                                 qual.trimQScores(-1, rStart);
1314                                 qual.trimQScores(fStart, -1);
1315                             }
1316                         }
1317                         success = minDiff;
1318                         //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1319                     }else { success = pdiffs + 100;     }
1320                 }
1321             }
1322             
1323             if (alignment != NULL) { delete alignment; }
1324         }
1325         
1326         return success;
1327         
1328     }
1329     catch(exception& e) {
1330         m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1331         exit(1);
1332     }
1333     
1334 }
1335
1336 //*******************************************************************/
1337 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1338     try {
1339         //look for forward barcode
1340         string rawFSequence = forwardSeq.getUnaligned();
1341         string rawRSequence = reverseSeq.getUnaligned();
1342         int success = pdiffs + 1;       //guilty until proven innocent
1343         
1344         //can you find the forward barcode
1345         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1346             string foligo = it->second.forward;
1347             string roligo = it->second.reverse;
1348             
1349             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1350                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1351                 break;
1352             }
1353             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1354                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1355                 break;
1356             }
1357             
1358             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1359                 group = it->first;
1360                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1361                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1362                 forwardQual.trimQScores(foligo.length(), -1);
1363                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
1364                 success = 0;
1365                 break;
1366             }
1367         }
1368         
1369         //if you found the barcode or if you don't want to allow for diffs
1370         if ((pdiffs == 0) || (success == 0)) { return success; }
1371         else { //try aligning and see if you can find it
1372             Alignment* alignment;
1373             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1374             else{ alignment = NULL; }
1375             
1376             //can you find the barcode
1377             int minDiff = 1e6;
1378             int minCount = 1;
1379             vector< vector<int> > minFGroup;
1380             vector<int> minFPos;
1381             
1382             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1383             /*
1384              1 Sarah Westcott
1385              2 John Westcott
1386              3 Anna Westcott
1387              4 Sarah Schloss
1388              5 Pat Schloss
1389              6 Gail Brown
1390              7 Pat Moore
1391              only want to look for forward = Sarah, John, Anna, Pat, Gail
1392              reverse = Westcott, Schloss, Brown, Moore
1393              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1394              */
1395             //cout << endl << forwardSeq.getName() << endl;
1396             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1397                 string oligo = it->first;
1398                 
1399                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1400                     success = pdiffs + 10;
1401                     break;
1402                 }
1403                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1404                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1405                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1406                 oligo = alignment->getSeqAAln();
1407                 string temp = alignment->getSeqBAln();
1408                 
1409                 int alnLength = oligo.length();
1410                 
1411                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1412                 oligo = oligo.substr(0,alnLength);
1413                 temp = temp.substr(0,alnLength);
1414                 int numDiff = countDiffs(oligo, temp);
1415                 
1416                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1417                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1418                 
1419                 if(numDiff < minDiff){
1420                     minDiff = numDiff;
1421                     minCount = 1;
1422                     minFGroup.clear();
1423                     minFGroup.push_back(it->second);
1424                     int tempminFPos = 0;
1425                     minFPos.clear();
1426                     for(int i=0;i<alnLength;i++){
1427                         if(temp[i] != '-'){
1428                             tempminFPos++;
1429                         }
1430                     }
1431                     minFPos.push_back(tempminFPos);
1432                 }else if(numDiff == minDiff){
1433                     minFGroup.push_back(it->second);
1434                     int tempminFPos = 0;
1435                     for(int i=0;i<alnLength;i++){
1436                         if(temp[i] != '-'){
1437                             tempminFPos++;
1438                         }
1439                     }
1440                     minFPos.push_back(tempminFPos);
1441                 }
1442             }
1443             
1444             //cout << minDiff << '\t' << minCount << '\t' << endl;
1445             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1446             else{
1447                 //check for reverse match
1448                 if (alignment != NULL) { delete alignment; }
1449                 
1450                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1451                 else{ alignment = NULL; }
1452                 
1453                 //can you find the barcode
1454                 minDiff = 1e6;
1455                 minCount = 1;
1456                 vector< vector<int> > minRGroup;
1457                 vector<int> minRPos;
1458                 
1459                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1460                     string oligo = it->first;
1461                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1462                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1463                         success = pdiffs + 10;
1464                         break;
1465                     }
1466                     
1467                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1468                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1469                     oligo = alignment->getSeqAAln();
1470                     string temp = alignment->getSeqBAln();
1471                     
1472                     int alnLength = oligo.length();
1473                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1474                     oligo = oligo.substr(0,alnLength);
1475                     temp = temp.substr(0,alnLength);
1476                     int numDiff = countDiffs(oligo, temp);
1477                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1478                     
1479                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1480                     if(numDiff < minDiff){
1481                         minDiff = numDiff;
1482                         minCount = 1;
1483                         minRGroup.clear();
1484                         minRGroup.push_back(it->second);
1485                         int tempminRPos = 0;
1486                         minRPos.clear();
1487                         for(int i=0;i<alnLength;i++){
1488                             if(temp[i] != '-'){
1489                                 tempminRPos++;
1490                             }
1491                         }
1492                         minRPos.push_back(tempminRPos);
1493                     }else if(numDiff == minDiff){
1494                         int tempminRPos = 0;
1495                         for(int i=0;i<alnLength;i++){
1496                             if(temp[i] != '-'){
1497                                 tempminRPos++;
1498                             }
1499                         }
1500                         minRPos.push_back(tempminRPos);
1501                         minRGroup.push_back(it->second);
1502                     }
1503                     
1504                 }
1505                 
1506                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1507                  for (int i = 0; i < minFGroup.size(); i++) {
1508                  cout << i << '\t';
1509                  for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1510                  cout << endl;
1511                  }
1512                  cout << endl;
1513                  for (int i = 0; i < minRGroup.size(); i++) {
1514                  cout << i << '\t';
1515                  for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1516                  cout << endl;
1517                  }
1518                  cout << endl;*/
1519                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1520                 else {
1521                     bool foundMatch = false;
1522                     vector<int> matches;
1523                     for (int i = 0; i < minFGroup.size(); i++) {
1524                         for (int j = 0; j < minFGroup[i].size(); j++) {
1525                             for (int k = 0; k < minRGroup.size(); k++) {
1526                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1527                             }
1528                         }
1529                     }
1530                     
1531                     int fStart = 0;
1532                     int rStart = 0;
1533                     if (matches.size() == 1) {
1534                         foundMatch = true;
1535                         group = matches[0];
1536                         fStart = minFPos[0];
1537                         rStart = minRPos[0];
1538                     }
1539                     
1540                     //we have an acceptable match for the forward and reverse, but do they match?
1541                     if (foundMatch) {
1542                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1543                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1544                         forwardQual.trimQScores(fStart, -1);
1545                         reverseQual.trimQScores(rStart, -1);
1546                         success = minDiff;
1547                     }else { success = pdiffs + 100;     }
1548                 }
1549             }
1550             
1551             if (alignment != NULL) { delete alignment; }
1552         }
1553         
1554         return success;
1555         
1556     }
1557     catch(exception& e) {
1558         m->errorOut(e, "TrimOligos", "stripIForward");
1559         exit(1);
1560     }
1561     
1562 }
1563 //*******************************************************************/
1564 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1565     try {
1566         //look for forward barcode
1567         string rawFSequence = forwardSeq.getUnaligned();
1568         string rawRSequence = reverseSeq.getUnaligned();
1569         int success = pdiffs + 1;       //guilty until proven innocent
1570         
1571         //can you find the forward barcode
1572         for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1573             string foligo = it->second.forward;
1574             string roligo = it->second.reverse;
1575             
1576             if(rawFSequence.length() < foligo.length()){        //let's just assume that the barcodes are the same length
1577                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1578                 break;
1579             }
1580             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1581                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1582                 break;
1583             }
1584             
1585             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1586                 group = it->first;
1587                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1588                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1589                 success = 0;
1590                 break;
1591             }
1592         }
1593         
1594         //if you found the barcode or if you don't want to allow for diffs
1595         if ((pdiffs == 0) || (success == 0)) { return success; }
1596         else { //try aligning and see if you can find it
1597             Alignment* alignment;
1598             if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1599             else{ alignment = NULL; }
1600             
1601             //can you find the barcode
1602             int minDiff = 1e6;
1603             int minCount = 1;
1604             vector< vector<int> > minFGroup;
1605             vector<int> minFPos;
1606             
1607             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1608             /*
1609              1 Sarah Westcott
1610              2 John Westcott
1611              3 Anna Westcott
1612              4 Sarah Schloss
1613              5 Pat Schloss
1614              6 Gail Brown
1615              7 Pat Moore
1616              only want to look for forward = Sarah, John, Anna, Pat, Gail
1617              reverse = Westcott, Schloss, Brown, Moore
1618              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
1619              */
1620             //cout << endl << forwardSeq.getName() << endl;
1621             for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1622                 string oligo = it->first;
1623                 
1624                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1625                     success = pdiffs + 10;
1626                     break;
1627                 }
1628                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1629                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1630                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1631                 oligo = alignment->getSeqAAln();
1632                 string temp = alignment->getSeqBAln();
1633                 
1634                 int alnLength = oligo.length();
1635                 
1636                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1637                 oligo = oligo.substr(0,alnLength);
1638                 temp = temp.substr(0,alnLength);
1639                 int numDiff = countDiffs(oligo, temp);
1640                 
1641                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1642                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1643                 
1644                 if(numDiff < minDiff){
1645                     minDiff = numDiff;
1646                     minCount = 1;
1647                     minFGroup.clear();
1648                     minFGroup.push_back(it->second);
1649                     int tempminFPos = 0;
1650                     minFPos.clear();
1651                     for(int i=0;i<alnLength;i++){
1652                         if(temp[i] != '-'){
1653                             tempminFPos++;
1654                         }
1655                     }
1656                     minFPos.push_back(tempminFPos);
1657                 }else if(numDiff == minDiff){
1658                     minFGroup.push_back(it->second);
1659                     int tempminFPos = 0;
1660                     for(int i=0;i<alnLength;i++){
1661                         if(temp[i] != '-'){
1662                             tempminFPos++;
1663                         }
1664                     }
1665                     minFPos.push_back(tempminFPos);
1666                 }
1667             }
1668             
1669             //cout << minDiff << '\t' << minCount << '\t' << endl;
1670             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1671             else{
1672                 //check for reverse match
1673                 if (alignment != NULL) { delete alignment; }
1674                 
1675                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
1676                 else{ alignment = NULL; }
1677                 
1678                 //can you find the barcode
1679                 minDiff = 1e6;
1680                 minCount = 1;
1681                 vector< vector<int> > minRGroup;
1682                 vector<int> minRPos;
1683                 
1684                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1685                     string oligo = it->first;
1686                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1687                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1688                         success = pdiffs + 10;
1689                         break;
1690                     }
1691                     
1692                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1693                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1694                     oligo = alignment->getSeqAAln();
1695                     string temp = alignment->getSeqBAln();
1696                     
1697                     int alnLength = oligo.length();
1698                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1699                     oligo = oligo.substr(0,alnLength);
1700                     temp = temp.substr(0,alnLength);
1701                     int numDiff = countDiffs(oligo, temp);
1702                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1703                     
1704                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1705                     if(numDiff < minDiff){
1706                         minDiff = numDiff;
1707                         minCount = 1;
1708                         minRGroup.clear();
1709                         minRGroup.push_back(it->second);
1710                         int tempminRPos = 0;
1711                         minRPos.clear();
1712                         for(int i=0;i<alnLength;i++){
1713                             if(temp[i] != '-'){
1714                                 tempminRPos++;
1715                             }
1716                         }
1717                         minRPos.push_back(tempminRPos);
1718                     }else if(numDiff == minDiff){
1719                         int tempminRPos = 0;
1720                         for(int i=0;i<alnLength;i++){
1721                             if(temp[i] != '-'){
1722                                 tempminRPos++;
1723                             }
1724                         }
1725                         minRPos.push_back(tempminRPos);
1726                         minRGroup.push_back(it->second);
1727                     }
1728                     
1729                 }
1730                 
1731                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1732                  for (int i = 0; i < minFGroup.size(); i++) {
1733                  cout << i << '\t';
1734                  for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
1735                  cout << endl;
1736                  }
1737                  cout << endl;
1738                  for (int i = 0; i < minRGroup.size(); i++) {
1739                  cout << i << '\t';
1740                  for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
1741                  cout << endl;
1742                  }
1743                  cout << endl;*/
1744                 if(minDiff > pdiffs)    {       success = minDiff;      }       //no good matches
1745                 else {
1746                     bool foundMatch = false;
1747                     vector<int> matches;
1748                     for (int i = 0; i < minFGroup.size(); i++) {
1749                         for (int j = 0; j < minFGroup[i].size(); j++) {
1750                             for (int k = 0; k < minRGroup.size(); k++) {
1751                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1752                             }
1753                         }
1754                     }
1755                     
1756                     int fStart = 0;
1757                     int rStart = 0;
1758                     if (matches.size() == 1) {
1759                         foundMatch = true;
1760                         group = matches[0];
1761                         fStart = minFPos[0];
1762                         rStart = minRPos[0];
1763                     }
1764                     
1765                     //we have an acceptable match for the forward and reverse, but do they match?
1766                     if (foundMatch) {
1767                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1768                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1769                         success = minDiff;
1770                     }else { success = pdiffs + 100;     }
1771                 }
1772             }
1773             
1774             if (alignment != NULL) { delete alignment; }
1775         }
1776         
1777         return success;
1778         
1779     }
1780     catch(exception& e) {
1781         m->errorOut(e, "TrimOligos", "stripIForward");
1782         exit(1);
1783     }
1784     
1785 }
1786 //*******************************************************************/
1787 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1788     try {
1789         
1790         string rawSequence = seq.getUnaligned();
1791         int success = bdiffs + 1;       //guilty until proven innocent
1792         
1793         //can you find the barcode
1794         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1795             string oligo = it->first;
1796             if(rawSequence.length() < oligo.length()){  //let's just assume that the barcodes are the same length
1797                 success = bdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1798                 break;
1799             }
1800             
1801             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1802                 group = it->second;
1803                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1804                 
1805                 success = 0;
1806                 break;
1807             }
1808         }
1809         
1810         //if you found the barcode or if you don't want to allow for diffs
1811         if ((bdiffs == 0) || (success == 0)) { return success; }
1812         
1813         else { //try aligning and see if you can find it
1814             Alignment* alignment;
1815             if (barcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
1816             else{ alignment = NULL; }
1817             
1818             //can you find the barcode
1819             int minDiff = 1e6;
1820             int minCount = 1;
1821             int minGroup = -1;
1822             int minPos = 0;
1823             
1824             for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1825                 string oligo = it->first;
1826                 // int length = oligo.length();
1827                 
1828                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
1829                     success = bdiffs + 10;
1830                     break;
1831                 }
1832                 
1833                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1834                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1835                 oligo = alignment->getSeqAAln();
1836                 string temp = alignment->getSeqBAln();
1837                 
1838                 int alnLength = oligo.length();
1839                 
1840                 for(int i=oligo.length()-1;i>=0;i--){
1841                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1842                 }
1843                 oligo = oligo.substr(0,alnLength);
1844                 temp = temp.substr(0,alnLength);
1845                 
1846                 int numDiff = countDiffs(oligo, temp);
1847                 
1848                 if(numDiff < minDiff){
1849                     minDiff = numDiff;
1850                     minCount = 1;
1851                     minGroup = it->second;
1852                     minPos = 0;
1853                     for(int i=0;i<alnLength;i++){
1854                         if(temp[i] != '-'){
1855                             minPos++;
1856                         }
1857                     }
1858                 }
1859                 else if(numDiff == minDiff){
1860                     minCount++;
1861                 }
1862                 
1863             }
1864             
1865             if(minDiff > bdiffs)        {       success = minDiff;      }       //no good matches
1866             else if(minCount > 1)       {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1867             else{       //use the best match
1868                 group = minGroup;
1869                 seq.setUnaligned(rawSequence.substr(minPos));
1870                 success = minDiff;
1871             }
1872             
1873             if (alignment != NULL) { delete alignment; }
1874             
1875         }
1876         
1877         return success;
1878         
1879     }
1880     catch(exception& e) {
1881         m->errorOut(e, "TrimOligos", "stripBarcode");
1882         exit(1);
1883     }
1884     
1885 }
1886
1887 /********************************************************************/
1888 int TrimOligos::stripForward(Sequence& seq, int& group){
1889     try {
1890         
1891         string rawSequence = seq.getUnaligned();
1892         int success = pdiffs + 1;       //guilty until proven innocent
1893         
1894         //can you find the primer
1895         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1896             string oligo = it->first;
1897             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
1898                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1899                 break;
1900             }
1901             
1902             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1903                 group = it->second;
1904                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1905                 success = 0;
1906                 break;
1907             }
1908         }
1909         
1910         //if you found the barcode or if you don't want to allow for diffs
1911         if ((pdiffs == 0) || (success == 0)) {  return success; }
1912         
1913         else { //try aligning and see if you can find it
1914             Alignment* alignment;
1915             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
1916             else{ alignment = NULL; }
1917             
1918             //can you find the barcode
1919             int minDiff = 1e6;
1920             int minCount = 1;
1921             int minGroup = -1;
1922             int minPos = 0;
1923             
1924             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1925                 string oligo = it->first;
1926                 // int length = oligo.length();
1927                 
1928                 if(rawSequence.length() < maxFPrimerLength){
1929                     success = pdiffs + 100;
1930                     break;
1931                 }
1932                 
1933                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1934                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1935                 oligo = alignment->getSeqAAln();
1936                 string temp = alignment->getSeqBAln();
1937                 
1938                 int alnLength = oligo.length();
1939                 
1940                 for(int i=oligo.length()-1;i>=0;i--){
1941                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
1942                 }
1943                 oligo = oligo.substr(0,alnLength);
1944                 temp = temp.substr(0,alnLength);
1945                 
1946                 int numDiff = countDiffs(oligo, temp);
1947                 
1948                 if(numDiff < minDiff){
1949                     minDiff = numDiff;
1950                     minCount = 1;
1951                     minGroup = it->second;
1952                     minPos = 0;
1953                     for(int i=0;i<alnLength;i++){
1954                         if(temp[i] != '-'){
1955                             minPos++;
1956                         }
1957                     }
1958                 }
1959                 else if(numDiff == minDiff){
1960                     minCount++;
1961                 }
1962                 
1963             }
1964             
1965             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
1966             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1967             else{       //use the best match
1968                 group = minGroup;
1969                 seq.setUnaligned(rawSequence.substr(minPos));
1970                 success = minDiff;
1971             }
1972             
1973             if (alignment != NULL) { delete alignment; }
1974             
1975         }
1976         
1977         return success;
1978         
1979     }
1980     catch(exception& e) {
1981         m->errorOut(e, "TrimOligos", "stripForward");
1982         exit(1);
1983     }
1984 }
1985 //*******************************************************************/
1986 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1987     try {
1988         
1989         if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
1990         
1991         string rawSequence = seq.getUnaligned();
1992         int success = pdiffs + 1;       //guilty until proven innocent
1993         
1994         //can you find the primer
1995         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1996             string oligo = it->first;
1997             if(rawSequence.length() < oligo.length()){  //let's just assume that the primers are the same length
1998                 success = pdiffs + 10;  //if the sequence is shorter than the barcode then bail out
1999                 break;
2000             }
2001             
2002             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2003                 group = it->second;
2004                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
2005                 if(qual.getName() != ""){
2006                     if (!keepForward) { qual.trimQScores(oligo.length(), -1); }
2007                 }
2008                 success = 0;
2009                 break;
2010             }
2011         }
2012         
2013         //if you found the barcode or if you don't want to allow for diffs
2014         if ((pdiffs == 0) || (success == 0)) { return success; }
2015         
2016         else { //try aligning and see if you can find it
2017             Alignment* alignment;
2018             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
2019             else{ alignment = NULL; }
2020             
2021             //can you find the barcode
2022             int minDiff = 1e6;
2023             int minCount = 1;
2024             int minGroup = -1;
2025             int minPos = 0;
2026             
2027             for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2028                 string oligo = it->first;
2029                 // int length = oligo.length();
2030                 
2031                 if(rawSequence.length() < maxFPrimerLength){
2032                     success = pdiffs + 100;
2033                     break;
2034                 }
2035                 
2036                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2037                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
2038                 oligo = alignment->getSeqAAln();
2039                 string temp = alignment->getSeqBAln();
2040                 
2041                 int alnLength = oligo.length();
2042                 
2043                 for(int i=oligo.length()-1;i>=0;i--){
2044                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2045                 }
2046                 oligo = oligo.substr(0,alnLength);
2047                 temp = temp.substr(0,alnLength);
2048                 
2049                 int numDiff = countDiffs(oligo, temp);
2050                 
2051                 if(numDiff < minDiff){
2052                     minDiff = numDiff;
2053                     minCount = 1;
2054                     minGroup = it->second;
2055                     minPos = 0;
2056                     for(int i=0;i<alnLength;i++){
2057                         if(temp[i] != '-'){
2058                             minPos++;
2059                         }
2060                     }
2061                 }
2062                 else if(numDiff == minDiff){
2063                     minCount++;
2064                 }
2065                 
2066             }
2067             
2068             if(minDiff > pdiffs)        {       success = minDiff;      }       //no good matches
2069             else if(minCount > 1)       {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
2070             else{       //use the best match
2071                 group = minGroup;
2072                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2073                 if(qual.getName() != ""){
2074                     if (!keepForward) { qual.trimQScores(minPos, -1); }
2075                 }
2076                 success = minDiff;
2077             }
2078             
2079             if (alignment != NULL) { delete alignment; }
2080             
2081         }
2082         
2083         return success;
2084         
2085     }
2086     catch(exception& e) {
2087         m->errorOut(e, "TrimOligos", "stripForward");
2088         exit(1);
2089     }
2090 }
2091 //******************************************************************/
2092 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2093     try {
2094         string rawSequence = seq.getUnaligned();
2095         bool success = 0;       //guilty until proven innocent
2096         
2097         for(int i=0;i<revPrimer.size();i++){
2098             string oligo = revPrimer[i];
2099             
2100             if(rawSequence.length() < oligo.length()){
2101                 success = 0;
2102                 break;
2103             }
2104             
2105             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2106                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2107                 if(qual.getName() != ""){
2108                     qual.trimQScores(-1, rawSequence.length()-oligo.length());
2109                 }
2110                 success = 1;
2111                 break;
2112             }
2113         }       
2114         return success;
2115         
2116     }
2117     catch(exception& e) {
2118         m->errorOut(e, "TrimOligos", "stripReverse");
2119         exit(1);
2120     }
2121 }
2122 //******************************************************************/
2123 bool TrimOligos::stripReverse(Sequence& seq){
2124     try {
2125         
2126         string rawSequence = seq.getUnaligned();
2127         bool success = 0;       //guilty until proven innocent
2128         
2129         for(int i=0;i<revPrimer.size();i++){
2130             string oligo = revPrimer[i];
2131             
2132             if(rawSequence.length() < oligo.length()){
2133                 success = 0;
2134                 break;
2135             }
2136             
2137             if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2138                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2139                 success = 1;
2140                 break;
2141             }
2142         }       
2143         
2144         return success;
2145         
2146     }
2147     catch(exception& e) {
2148         m->errorOut(e, "TrimOligos", "stripReverse");
2149         exit(1);
2150     }
2151 }
2152 //******************************************************************/
2153 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2154     try {
2155         string rawSequence = seq.getUnaligned();
2156         bool success = ldiffs + 1;      //guilty until proven innocent
2157         
2158         for(int i=0;i<linker.size();i++){
2159             string oligo = linker[i];
2160             
2161             if(rawSequence.length() < oligo.length()){
2162                 success = ldiffs + 10;
2163                 break;
2164             }
2165             
2166             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2167                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2168                 if(qual.getName() != ""){
2169                     qual.trimQScores(oligo.length(), -1);
2170                 }
2171                 success = 0;
2172                 break;
2173             }
2174         }
2175         
2176         //if you found the linker or if you don't want to allow for diffs
2177         if ((ldiffs == 0) || (success == 0)) { return success; }
2178         
2179         else { //try aligning and see if you can find it
2180             Alignment* alignment;
2181             if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }   
2182             else{ alignment = NULL; }
2183             
2184             //can you find the barcode
2185             int minDiff = 1e6;
2186             int minCount = 1;
2187             int minPos = 0;
2188             
2189             for(int i = 0; i < linker.size(); i++){
2190                 string oligo = linker[i];
2191                 // int length = oligo.length();
2192                 
2193                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2194                     success = ldiffs + 10;
2195                     break;
2196                 }
2197                 
2198                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2199                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2200                 oligo = alignment->getSeqAAln();
2201                 string temp = alignment->getSeqBAln();
2202                 
2203                 int alnLength = oligo.length();
2204                 
2205                 for(int i=oligo.length()-1;i>=0;i--){
2206                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2207                 }
2208                 oligo = oligo.substr(0,alnLength);
2209                 temp = temp.substr(0,alnLength);
2210                 
2211                 int numDiff = countDiffs(oligo, temp);
2212                 
2213                 if(numDiff < minDiff){
2214                     minDiff = numDiff;
2215                     minCount = 1;
2216                     minPos = 0;
2217                     for(int i=0;i<alnLength;i++){
2218                         if(temp[i] != '-'){
2219                             minPos++;
2220                         }
2221                     }
2222                 }
2223                 else if(numDiff == minDiff){
2224                     minCount++;
2225                 }
2226                 
2227             }
2228             
2229             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2230             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2231             else{       //use the best match
2232                 seq.setUnaligned(rawSequence.substr(minPos));
2233                 
2234                 if(qual.getName() != ""){
2235                     qual.trimQScores(minPos, -1);
2236                 }
2237                 success = minDiff;
2238             }
2239             
2240             if (alignment != NULL) { delete alignment; }
2241             
2242         }
2243         
2244         
2245         return success;
2246         
2247     }
2248     catch(exception& e) {
2249         m->errorOut(e, "TrimOligos", "stripLinker");
2250         exit(1);
2251     }
2252 }
2253 //******************************************************************/
2254 bool TrimOligos::stripLinker(Sequence& seq){
2255     try {
2256         
2257         string rawSequence = seq.getUnaligned();
2258         bool success = ldiffs +1;       //guilty until proven innocent
2259         
2260         for(int i=0;i<linker.size();i++){
2261             string oligo = linker[i];
2262             
2263             if(rawSequence.length() < oligo.length()){
2264                 success = ldiffs +10;
2265                 break;
2266             }
2267             
2268             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2269                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2270                 success = 0;
2271                 break;
2272             }
2273         }       
2274         
2275         //if you found the linker or if you don't want to allow for diffs
2276         if ((ldiffs == 0) || (success == 0)) { return success; }
2277         
2278         else { //try aligning and see if you can find it
2279             Alignment* alignment;
2280             if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1)); }
2281             else{ alignment = NULL; }
2282             
2283             //can you find the barcode
2284             int minDiff = 1e6;
2285             int minCount = 1;
2286             int minPos = 0;
2287             
2288             for(int i = 0; i < linker.size(); i++){
2289                 string oligo = linker[i];
2290                 // int length = oligo.length();
2291                 
2292                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2293                     success = ldiffs + 10;
2294                     break;
2295                 }
2296                 
2297                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2298                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2299                 oligo = alignment->getSeqAAln();
2300                 string temp = alignment->getSeqBAln();
2301                 
2302                 int alnLength = oligo.length();
2303                 
2304                 for(int i=oligo.length()-1;i>=0;i--){
2305                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2306                 }
2307                 oligo = oligo.substr(0,alnLength);
2308                 temp = temp.substr(0,alnLength);
2309                 
2310                 int numDiff = countDiffs(oligo, temp);
2311                 
2312                 if(numDiff < minDiff){
2313                     minDiff = numDiff;
2314                     minCount = 1;
2315                     minPos = 0;
2316                     for(int i=0;i<alnLength;i++){
2317                         if(temp[i] != '-'){
2318                             minPos++;
2319                         }
2320                     }
2321                 }
2322                 else if(numDiff == minDiff){
2323                     minCount++;
2324                 }
2325                 
2326             }
2327             
2328             if(minDiff > ldiffs)        {       success = minDiff;      }       //no good matches
2329             else if(minCount > 1)       {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2330             else{       //use the best match
2331                 seq.setUnaligned(rawSequence.substr(minPos));
2332                 success = minDiff;
2333             }
2334             
2335             if (alignment != NULL) { delete alignment; }
2336             
2337         }
2338         
2339         return success;
2340         
2341     }
2342     catch(exception& e) {
2343         m->errorOut(e, "TrimOligos", "stripLinker");
2344         exit(1);
2345     }
2346 }
2347
2348 //******************************************************************/
2349 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2350     try {
2351         string rawSequence = seq.getUnaligned();
2352         bool success = sdiffs+1;        //guilty until proven innocent
2353         
2354         for(int i=0;i<spacer.size();i++){
2355             string oligo = spacer[i];
2356             
2357             if(rawSequence.length() < oligo.length()){
2358                 success = sdiffs+10;
2359                 break;
2360             }
2361             
2362             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2363                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2364                 if(qual.getName() != ""){
2365                     qual.trimQScores(oligo.length(), -1);
2366                 }
2367                 success = 0;
2368                 break;
2369             }
2370         }
2371         
2372         //if you found the spacer or if you don't want to allow for diffs
2373         if ((sdiffs == 0) || (success == 0)) { return success; }
2374         
2375         else { //try aligning and see if you can find it
2376             Alignment* alignment;
2377             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2378             else{ alignment = NULL; }
2379             
2380             //can you find the barcode
2381             int minDiff = 1e6;
2382             int minCount = 1;
2383             int minPos = 0;
2384             
2385             for(int i = 0; i < spacer.size(); i++){
2386                 string oligo = spacer[i];
2387                 // int length = oligo.length();
2388                 
2389                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2390                     success = sdiffs + 10;
2391                     break;
2392                 }
2393                 
2394                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2395                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2396                 oligo = alignment->getSeqAAln();
2397                 string temp = alignment->getSeqBAln();
2398                 
2399                 int alnLength = oligo.length();
2400                 
2401                 for(int i=oligo.length()-1;i>=0;i--){
2402                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2403                 }
2404                 oligo = oligo.substr(0,alnLength);
2405                 temp = temp.substr(0,alnLength);
2406                 
2407                 int numDiff = countDiffs(oligo, temp);
2408                 
2409                 if(numDiff < minDiff){
2410                     minDiff = numDiff;
2411                     minCount = 1;
2412                     minPos = 0;
2413                     for(int i=0;i<alnLength;i++){
2414                         if(temp[i] != '-'){
2415                             minPos++;
2416                         }
2417                     }
2418                 }
2419                 else if(numDiff == minDiff){
2420                     minCount++;
2421                 }
2422                 
2423             }
2424             
2425             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2426             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2427             else{       //use the best match
2428                 seq.setUnaligned(rawSequence.substr(minPos));
2429                 
2430                 if(qual.getName() != ""){
2431                     qual.trimQScores(minPos, -1);
2432                 }
2433                 success = minDiff;
2434             }
2435             
2436             if (alignment != NULL) { delete alignment; }
2437             
2438         }
2439         
2440         
2441         return success;
2442         
2443     }
2444     catch(exception& e) {
2445         m->errorOut(e, "TrimOligos", "stripSpacer");
2446         exit(1);
2447     }
2448 }
2449 //******************************************************************/
2450 bool TrimOligos::stripSpacer(Sequence& seq){
2451     try {
2452         
2453         string rawSequence = seq.getUnaligned();
2454         bool success = sdiffs+1;        //guilty until proven innocent
2455         
2456         for(int i=0;i<spacer.size();i++){
2457             string oligo = spacer[i];
2458             
2459             if(rawSequence.length() < oligo.length()){
2460                 success = sdiffs+10;
2461                 break;
2462             }
2463             
2464             if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2465                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2466                 success = 0;
2467                 break;
2468             }
2469         }       
2470         
2471         //if you found the spacer or if you don't want to allow for diffs
2472         if ((sdiffs == 0) || (success == 0)) { return success; }
2473         
2474         else { //try aligning and see if you can find it
2475             Alignment* alignment;
2476             if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1)); }
2477             else{ alignment = NULL; }
2478             
2479             //can you find the barcode
2480             int minDiff = 1e6;
2481             int minCount = 1;
2482             int minPos = 0;
2483             
2484             for(int i = 0; i < spacer.size(); i++){
2485                 string oligo = spacer[i];
2486                 // int length = oligo.length();
2487                 
2488                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2489                     success = sdiffs + 10;
2490                     break;
2491                 }
2492                 
2493                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2494                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2495                 oligo = alignment->getSeqAAln();
2496                 string temp = alignment->getSeqBAln();
2497                 
2498                 int alnLength = oligo.length();
2499                 
2500                 for(int i=oligo.length()-1;i>=0;i--){
2501                     if(oligo[i] != '-'){        alnLength = i+1;        break;  }
2502                 }
2503                 oligo = oligo.substr(0,alnLength);
2504                 temp = temp.substr(0,alnLength);
2505                 
2506                 int numDiff = countDiffs(oligo, temp);
2507                 
2508                 if(numDiff < minDiff){
2509                     minDiff = numDiff;
2510                     minCount = 1;
2511                     minPos = 0;
2512                     for(int i=0;i<alnLength;i++){
2513                         if(temp[i] != '-'){
2514                             minPos++;
2515                         }
2516                     }
2517                 }
2518                 else if(numDiff == minDiff){
2519                     minCount++;
2520                 }
2521                 
2522             }
2523             
2524             if(minDiff > sdiffs)        {       success = minDiff;      }       //no good matches
2525             else if(minCount > 1)       {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2526             else{       //use the best match
2527                 seq.setUnaligned(rawSequence.substr(minPos));
2528                 success = minDiff;
2529             }
2530             
2531             if (alignment != NULL) { delete alignment; }
2532             
2533         }
2534         
2535         return success;
2536         
2537     }
2538     catch(exception& e) {
2539         m->errorOut(e, "TrimOligos", "stripSpacer");
2540         exit(1);
2541     }
2542 }
2543
2544 //******************************************************************/
2545 bool TrimOligos::compareDNASeq(string oligo, string seq){
2546     try {
2547         bool success = 1;
2548         int length = oligo.length();
2549         
2550         for(int i=0;i<length;i++){
2551             
2552             if(oligo[i] != seq[i]){
2553                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0; }
2554                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       success = 0;    }
2555                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       success = 0;    }
2556                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       success = 0;    }
2557                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       success = 0;    }
2558                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       success = 0;    }
2559                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       success = 0;    }
2560                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       success = 0;    }
2561                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2562                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2563                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
2564                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }       
2565                 
2566                 if(success == 0)        {       break;  }
2567             }
2568             else{
2569                 success = 1;
2570             }
2571         }
2572         
2573         return success;
2574     }
2575     catch(exception& e) {
2576         m->errorOut(e, "TrimOligos", "compareDNASeq");
2577         exit(1);
2578     }
2579     
2580 }
2581 //********************************************************************/
2582 int TrimOligos::countDiffs(string oligo, string seq){
2583     try {
2584         
2585         int length = oligo.length();
2586         int countDiffs = 0;
2587         
2588         for(int i=0;i<length;i++){
2589             
2590             if(oligo[i] != seq[i]){
2591                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++; }
2592                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))        {       countDiffs++;   }
2593                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))    {       countDiffs++;   }
2594                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))    {       countDiffs++;   }
2595                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))    {       countDiffs++;   }
2596                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))    {       countDiffs++;   }
2597                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))    {       countDiffs++;   }
2598                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))    {       countDiffs++;   }
2599                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2600                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2601                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
2602                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
2603             }
2604             
2605         }
2606         
2607         return countDiffs;
2608     }
2609     catch(exception& e) {
2610         m->errorOut(e, "TrimOligos", "countDiffs");
2611         exit(1);
2612     }
2613 }
2614 //********************************************************************/
2615 string TrimOligos::reverseOligo(string oligo){
2616     try {
2617         string reverse = "";
2618         
2619         for(int i=oligo.length()-1;i>=0;i--){
2620             
2621             if(oligo[i] == 'A') {       reverse += 'T'; }
2622             else if(oligo[i] == 'T'){   reverse += 'A'; }
2623             else if(oligo[i] == 'U'){   reverse += 'A'; }
2624             
2625             else if(oligo[i] == 'G'){   reverse += 'C'; }
2626             else if(oligo[i] == 'C'){   reverse += 'G'; }
2627             
2628             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2629             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2630             
2631             else if(oligo[i] == 'M'){   reverse += 'K'; }
2632             else if(oligo[i] == 'K'){   reverse += 'M'; }
2633             
2634             else if(oligo[i] == 'W'){   reverse += 'W'; }
2635             else if(oligo[i] == 'S'){   reverse += 'S'; }
2636             
2637             else if(oligo[i] == 'B'){   reverse += 'V'; }
2638             else if(oligo[i] == 'V'){   reverse += 'B'; }
2639             
2640             else if(oligo[i] == 'D'){   reverse += 'H'; }
2641             else if(oligo[i] == 'H'){   reverse += 'D'; }
2642             
2643             else if(oligo[i] == '-'){   reverse += '-'; }
2644             else        {       reverse += 'N'; }
2645         }
2646         
2647         
2648         return reverse;
2649     }
2650     catch(exception& e) {
2651         m->errorOut(e, "TrimOligos", "reverseOligo");
2652         exit(1);
2653     }
2654 }
2655
2656 /********************************************************************/
2657
2658