]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
mods to resolve some misc warnings. added alignPrimer function to needle man to allow...
[mothur.git] / trimoligos.cpp
1 /*
2  *  trimoligos.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/1/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18         try {
19                 m = MothurOut::getInstance();
20         paired = false;
21                 
22                 pdiffs = p;
23                 bdiffs = b;
24         ldiffs = l;
25         sdiffs = s;
26                 
27                 barcodes = br;
28                 primers = pr;
29                 revPrimer = r;
30         linker = lk;
31         spacer = sp;
32         maxFBarcodeLength = 0;
33         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
34             if(it->first.length() > maxFBarcodeLength){
35                 maxFBarcodeLength = it->first.length();
36             }
37         }
38         maxFPrimerLength = 0;
39         map<string,int>::iterator it; 
40         for(it=primers.begin();it!=primers.end();it++){
41             if(it->first.length() > maxFPrimerLength){
42                 maxFPrimerLength = it->first.length();
43             }
44         }
45         
46         maxLinkerLength = 0;
47         for(int i = 0; i < linker.size(); i++){
48             if(linker[i].length() > maxLinkerLength){
49                 maxLinkerLength = linker[i].length();
50             }
51         }
52         
53         maxSpacerLength = 0;
54         for(int i = 0; i < spacer.size(); i++){
55             if(spacer[i].length() > maxSpacerLength){
56                 maxSpacerLength = spacer[i].length();
57             }
58         }
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "TrimOligos", "TrimOligos");
62                 exit(1);
63         }
64 }
65 /********************************************************************/
66 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
67 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
68         try {
69                 m = MothurOut::getInstance();
70                 
71                 pdiffs = p;
72                 bdiffs = b;
73         ldiffs = l;
74         sdiffs = s;
75         paired = true;
76                 
77         maxFBarcodeLength = 0;
78         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
79             string forward = it->second.forward;
80             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
81             
82             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length();  }
83             
84             if (itForward == ifbarcodes.end()) {
85                 vector<int> temp; temp.push_back(it->first);
86                 ifbarcodes[forward] = temp;
87             }else { itForward->second.push_back(it->first); }
88         }
89
90         maxFPrimerLength = 0;
91         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
92             string forward = it->second.forward;
93             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
94             
95             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length();  }
96             
97             if (itForward == ifprimers.end()) {
98                 vector<int> temp; temp.push_back(it->first);
99                 ifprimers[forward] = temp;
100             }else { itForward->second.push_back(it->first); }
101         }
102         
103         maxRBarcodeLength = 0;
104         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
105             string forward = it->second.reverse;
106             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
107             
108             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length();  }
109             
110             if (itForward == irbarcodes.end()) {
111                 vector<int> temp; temp.push_back(it->first);
112                 irbarcodes[forward] = temp;
113             }else { itForward->second.push_back(it->first); }
114         }
115         
116         maxRPrimerLength = 0;
117         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
118             string forward = it->second.reverse;
119             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
120             
121             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length();  }
122             
123             if (itForward == irprimers.end()) {
124                 vector<int> temp; temp.push_back(it->first);
125                 irprimers[forward] = temp;
126             }else { itForward->second.push_back(it->first); }
127         }
128
129         ipbarcodes = br;
130         ipprimers = pr;
131         }
132         catch(exception& e) {
133                 m->errorOut(e, "TrimOligos", "TrimOligos");
134                 exit(1);
135         }
136 }
137 /********************************************************************/
138 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
139 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
140         try {
141                 m = MothurOut::getInstance();
142                 
143                 pdiffs = p;
144                 bdiffs = b;
145                 
146                 barcodes = br;
147                 primers = pr;
148                 revPrimer = r;
149         paired = false;
150         
151         maxFBarcodeLength = 0;
152         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
153             string oligo = it->first;
154             if(oligo.length() > maxFBarcodeLength){
155                 maxFBarcodeLength = oligo.length();
156             }
157         }
158         maxRBarcodeLength = maxFBarcodeLength;
159         
160         maxFPrimerLength = 0;
161         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
162             string oligo = it->first;
163             if(oligo.length() > maxFPrimerLength){
164                 maxFPrimerLength = oligo.length();
165             }
166         }
167         maxRPrimerLength = maxFPrimerLength;
168         }
169         catch(exception& e) {
170                 m->errorOut(e, "TrimOligos", "TrimOligos");
171                 exit(1);
172         }
173 }
174 /********************************************************************/
175 TrimOligos::~TrimOligos() {}
176 //********************************************************************/
177 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
178         try {
179                 
180                 string rawSequence = seq.getUnaligned();
181                 
182                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
183                         string oligo = it->first;
184                         
185                         if(rawSequence.length() < oligo.length()) {  break;  }
186                         
187                         //search for primer
188             int olength = oligo.length();
189             for (int j = 0; j < rawSequence.length()-olength; j++){
190                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
191                 string rawChunk = rawSequence.substr(j, olength);
192                 if(compareDNASeq(oligo, rawChunk)) {
193                     primerStart = j;
194                     primerEnd = primerStart + olength;
195                     return true;
196                 }
197                 
198             }
199         }       
200                 
201         primerStart = 0; primerEnd = 0;
202         //if you don't want to allow for diffs
203                 if (pdiffs == 0) { return false;  }
204                 else { //try aligning and see if you can find it
205                                                 
206                         //can you find the barcode
207                         int minDiff = 1e6;
208                         int minCount = 1;
209                         
210             Alignment* alignment;
211             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
212             else{ alignment = NULL; } 
213             
214                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
215                 
216                 string prim = it->first;
217                 //search for primer
218                 int olength = prim.length();
219                 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
220                 else{
221                     for (int j = 0; j < rawSequence.length()-olength; j++){
222                         
223                         string oligo = it->first;
224
225                         if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
226                                 
227                         string rawChunk = rawSequence.substr(j, olength+pdiffs);
228                         
229                         //use needleman to align first primer.length()+numdiffs of sequence to each barcode
230                         alignment->alignPrimer(oligo, rawChunk);
231                         oligo = alignment->getSeqAAln();
232                         string temp = alignment->getSeqBAln();
233                                 
234                         int alnLength = oligo.length();
235                                 
236                         for(int i=oligo.length()-1;i>=0;i--){
237                             if(oligo[i] != '-'){        alnLength = i+1;        break;  }
238                         }
239                         oligo = oligo.substr(0,alnLength);
240                         temp = temp.substr(0,alnLength);
241                                 
242                         int numDiff = countDiffs(oligo, temp);
243                                 
244                         if(numDiff < minDiff){
245                             minDiff = numDiff;
246                             minCount = 1;
247                             primerStart = j;
248                             primerEnd = primerStart + alnLength;
249                         }else if(numDiff == minDiff){ minCount++; }
250                     }
251                 }
252                         }
253                         
254             if (alignment != NULL) {  delete alignment;  }
255             
256                         if(minDiff > pdiffs)    {       primerStart = 0; primerEnd = 0; return false;           }       //no good matches
257                         else if(minCount > 1)   {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
258                         else{  return true; }
259                 }
260        
261         primerStart = 0; primerEnd = 0;
262                 return false;
263                 
264         }
265         catch(exception& e) {
266                 m->errorOut(e, "TrimOligos", "stripForward");
267                 exit(1);
268         }
269 }
270 //******************************************************************/
271 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
272         try {
273                 
274                 string rawSequence = seq.getUnaligned();
275                 
276                 for(int i=0;i<revPrimer.size();i++){
277                         string oligo = revPrimer[i];
278                         if(rawSequence.length() < oligo.length()) {  break;  }
279                         
280                         //search for primer
281             int olength = oligo.length();
282             for (int j = rawSequence.length()-olength; j >= 0; j--){
283                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
284                 string rawChunk = rawSequence.substr(j, olength);
285                 
286                 if(compareDNASeq(oligo, rawChunk)) {
287                     primerStart = j;
288                     primerEnd = primerStart + olength;
289                     return true;
290                 }
291                 
292             }
293                 }       
294                 
295         primerStart = 0; primerEnd = 0;
296                 return false;
297         }
298         catch(exception& e) {
299                 m->errorOut(e, "PcrSeqsCommand", "findReverse");
300                 exit(1);
301         }
302 }
303 //*******************************************************************/
304
305 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
306         try {
307                 
308         if (paired) {  int success = stripPairedBarcode(seq, qual, group);  return success; }
309         
310                 string rawSequence = seq.getUnaligned();
311                 int success = bdiffs + 1;       //guilty until proven innocent
312                 
313                 //can you find the barcode
314                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
315                         string oligo = it->first;
316                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
317                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
318                                 break;  
319                         }
320                         
321                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
322                                 group = it->second;
323                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
324                                 
325                                 if(qual.getName() != ""){
326                                         qual.trimQScores(oligo.length(), -1);
327                                 }
328                                 
329                                 success = 0;
330                                 break;
331                         }
332                 }
333                 
334                 //if you found the barcode or if you don't want to allow for diffs
335                 if ((bdiffs == 0) || (success == 0)) { return success;  }
336                 
337                 else { //try aligning and see if you can find it
338                         Alignment* alignment;
339                         if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
340                         else{ alignment = NULL; } 
341                         
342                         //can you find the barcode
343                         int minDiff = 1e6;
344                         int minCount = 1;
345                         int minGroup = -1;
346                         int minPos = 0;
347                         
348                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
349                                 string oligo = it->first;
350                                 //                              int length = oligo.length();
351                                 
352                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
353                                         success = bdiffs + 10;
354                                         break;
355                                 }
356                                 
357                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
358                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
359                                 oligo = alignment->getSeqAAln();
360                                 string temp = alignment->getSeqBAln();
361                                 
362                                 int alnLength = oligo.length();
363                                 
364                                 for(int i=oligo.length()-1;i>=0;i--){
365                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
366                                 }
367                                 oligo = oligo.substr(0,alnLength);
368                                 temp = temp.substr(0,alnLength);
369                 int numDiff = countDiffs(oligo, temp);
370                                 
371                                 if(numDiff < minDiff){
372                                         minDiff = numDiff;
373                                         minCount = 1;
374                                         minGroup = it->second;
375                                         minPos = 0;
376                                         for(int i=0;i<alnLength;i++){
377                                                 if(temp[i] != '-'){
378                                                         minPos++;
379                                                 }
380                                         }
381                                 }
382                                 else if(numDiff == minDiff){
383                                         minCount++;
384                                 }
385                                 
386                         }
387                         
388                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
389                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
390                         else{                                                                                                   //use the best match
391                                 group = minGroup;
392                                 seq.setUnaligned(rawSequence.substr(minPos));
393     
394                                 if(qual.getName() != ""){
395                                         qual.trimQScores(minPos, -1);
396                                 }
397                                 success = minDiff;
398                         }
399                         
400                         if (alignment != NULL) {  delete alignment;  }
401                         
402                 }
403                 
404                 return success;
405                 
406         }
407         catch(exception& e) {
408                 m->errorOut(e, "TrimOligos", "stripBarcode");
409                 exit(1);
410         }
411 }
412 //*******************************************************************/
413 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
414         try {
415                 //look for forward barcode
416                 string rawFSequence = forwardSeq.getUnaligned();
417         string rawRSequence = reverseSeq.getUnaligned();
418                 int success = bdiffs + 1;       //guilty until proven innocent
419                 
420                 //can you find the forward barcode
421                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
422                         string foligo = it->second.forward;
423             string roligo = it->second.reverse;
424             
425                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
426                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
427                                 break;  
428                         }
429             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
430                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
431                                 break;  
432                         }
433                         
434                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
435                                 group = it->first;
436                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
437                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
438                                 success = 0;
439                                 break;
440                         }
441                 }
442                 
443                 //if you found the barcode or if you don't want to allow for diffs
444                 if ((bdiffs == 0) || (success == 0)) { return success;  }
445                 else { //try aligning and see if you can find it
446                         Alignment* alignment;
447                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
448                         else{ alignment = NULL; } 
449                         
450                         //can you find the barcode
451                         int minDiff = 1e6;
452                         int minCount = 1;
453                         vector< vector<int> > minFGroup;
454                         vector<int> minFPos; 
455             
456             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
457             /*
458                 1   Sarah   Westcott
459                 2   John    Westcott
460                 3   Anna    Westcott
461                 4   Sarah   Schloss
462                 5   Pat     Schloss
463                 6   Gail    Brown
464                 7   Pat     Moore
465              
466                 only want to look for forward = Sarah, John, Anna, Pat, Gail
467                                       reverse = Westcott, Schloss, Brown, Moore
468              
469                 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
470              */
471             //cout << endl << forwardSeq.getName() << endl;         
472                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
473                                 string oligo = it->first;
474                                 
475                                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
476                                         success = bdiffs + 10;
477                                         break;
478                                 }
479                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
480                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
481                                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
482                                 oligo = alignment->getSeqAAln();
483                                 string temp = alignment->getSeqBAln();
484                
485                                 int alnLength = oligo.length();
486                                 
487                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
488                                 oligo = oligo.substr(0,alnLength);
489                                 temp = temp.substr(0,alnLength);
490                 int numDiff = countDiffs(oligo, temp);
491                                 
492                 if (alnLength == 0) { numDiff = bdiffs + 100; }
493                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
494                 
495                                 if(numDiff < minDiff){
496                                         minDiff = numDiff;
497                                         minCount = 1;
498                                         minFGroup.clear();
499                     minFGroup.push_back(it->second);
500                                         int tempminFPos = 0;
501                     minFPos.clear();
502                                         for(int i=0;i<alnLength;i++){
503                                                 if(temp[i] != '-'){
504                                                         tempminFPos++;
505                                                 }
506                                         }
507                     minFPos.push_back(tempminFPos);
508                                 }else if(numDiff == minDiff){
509                                         minFGroup.push_back(it->second);
510                     int tempminFPos = 0;
511                                         for(int i=0;i<alnLength;i++){
512                                                 if(temp[i] != '-'){
513                                                         tempminFPos++;
514                                                 }
515                                         }
516                     minFPos.push_back(tempminFPos);
517                                 }
518                         }
519             
520                         //cout << minDiff << '\t' << minCount << '\t' << endl;
521                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
522                         else{   
523                 //check for reverse match
524                 if (alignment != NULL) {  delete alignment;  }
525                 
526                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
527                 else{ alignment = NULL; } 
528                 
529                 //can you find the barcode
530                 minDiff = 1e6;
531                 minCount = 1;
532                 vector< vector<int> > minRGroup;
533                 vector<int> minRPos;
534                 
535                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
536                     string oligo = it->first;
537                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
538                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
539                         success = bdiffs + 10;
540                         break;
541                     }
542                     
543                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
544                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
545                     oligo = alignment->getSeqAAln();
546                     string temp = alignment->getSeqBAln();
547                     
548                     int alnLength = oligo.length();
549                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
550                     oligo = oligo.substr(0,alnLength);
551                     temp = temp.substr(0,alnLength);
552                     int numDiff = countDiffs(oligo, temp);
553                     if (alnLength == 0) { numDiff = bdiffs + 100; }
554                     
555                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
556                     if(numDiff < minDiff){
557                         minDiff = numDiff;
558                         minCount = 1;
559                         minRGroup.clear();
560                         minRGroup.push_back(it->second);
561                         int tempminRPos = 0;
562                         minRPos.clear();
563                         for(int i=0;i<alnLength;i++){
564                             if(temp[i] != '-'){
565                                 tempminRPos++;
566                             }
567                         }
568                         minRPos.push_back(tempminRPos);                    
569                     }else if(numDiff == minDiff){
570                         int tempminRPos = 0;
571                         for(int i=0;i<alnLength;i++){
572                             if(temp[i] != '-'){
573                                 tempminRPos++;
574                             }
575                         }
576                         minRPos.push_back(tempminRPos);  
577                         minRGroup.push_back(it->second);
578                     }
579                     
580                 }
581                 
582                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
583                 for (int i = 0; i < minFGroup.size(); i++) { 
584                     cout << i << '\t';
585                     for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
586                     cout << endl;
587                 }
588                 cout << endl;
589                 for (int i = 0; i < minRGroup.size(); i++) { 
590                     cout << i << '\t';
591                     for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
592                     cout << endl;
593                 }
594                 cout << endl;*/
595                 if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
596                 else  {  
597                     bool foundMatch = false;
598                     vector<int> matches;
599                     for (int i = 0; i < minFGroup.size(); i++) {
600                         for (int j = 0; j < minFGroup[i].size(); j++) {
601                             for (int k = 0; k < minRGroup.size(); k++) {
602                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
603                             }
604                         }
605                     }
606                     
607                     int fStart = 0;
608                     int rStart = 0;
609                     if (matches.size() == 1) { 
610                         foundMatch = true;
611                         group = matches[0]; 
612                         fStart = minFPos[0];
613                         rStart = minRPos[0];
614                     }
615                     
616                     //we have an acceptable match for the forward and reverse, but do they match?
617                     if (foundMatch) {
618                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
619                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
620                         success = minDiff;
621                     }else { success = bdiffs + 100;     }
622                 }
623                         }
624                         
625                         if (alignment != NULL) {  delete alignment;  }
626                 }
627                 
628                 return success;
629                 
630         }
631         catch(exception& e) {
632                 m->errorOut(e, "TrimOligos", "stripIBarcode");
633                 exit(1);
634         }
635         
636 }
637 //*******************************************************************/
638 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
639         try {
640                 //look for forward barcode
641                 string rawFSequence = forwardSeq.getUnaligned();
642         string rawRSequence = reverseSeq.getUnaligned();
643                 int success = bdiffs + 1;       //guilty until proven innocent
644                 
645                 //can you find the forward barcode
646                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
647                         string foligo = it->second.forward;
648             string roligo = it->second.reverse;
649             
650                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
651                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
652                                 break;  
653                         }
654             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
655                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
656                                 break;  
657                         }
658                         
659                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
660                                 group = it->first;
661                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
662                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
663                 forwardQual.trimQScores(foligo.length(), -1);
664                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
665                                 success = 0;
666                                 break;
667                         }
668                 }
669                 
670                 //if you found the barcode or if you don't want to allow for diffs
671                 if ((bdiffs == 0) || (success == 0)) { return success;  }
672                 else { //try aligning and see if you can find it
673                         Alignment* alignment;
674                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
675                         else{ alignment = NULL; } 
676                         
677                         //can you find the barcode
678                         int minDiff = 1e6;
679                         int minCount = 1;
680                         vector< vector<int> > minFGroup;
681                         vector<int> minFPos; 
682             
683             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
684             /*
685              1   Sarah   Westcott
686              2   John    Westcott
687              3   Anna    Westcott
688              4   Sarah   Schloss
689              5   Pat     Schloss
690              6   Gail    Brown
691              7   Pat     Moore
692              
693              only want to look for forward = Sarah, John, Anna, Pat, Gail
694              reverse = Westcott, Schloss, Brown, Moore
695              
696              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
697              */
698             //cout << endl << forwardSeq.getName() << endl;         
699                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
700                                 string oligo = it->first;
701                                 
702                                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
703                                         success = bdiffs + 10;
704                                         break;
705                                 }
706                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
707                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
708                                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
709                                 oligo = alignment->getSeqAAln();
710                                 string temp = alignment->getSeqBAln();
711                 
712                                 int alnLength = oligo.length();
713                                 
714                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
715                                 oligo = oligo.substr(0,alnLength);
716                                 temp = temp.substr(0,alnLength);
717                 int numDiff = countDiffs(oligo, temp);
718                                 
719                 if (alnLength == 0) { numDiff = bdiffs + 100; }
720                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
721                 
722                                 if(numDiff < minDiff){
723                                         minDiff = numDiff;
724                                         minCount = 1;
725                                         minFGroup.clear();
726                     minFGroup.push_back(it->second);
727                                         int tempminFPos = 0;
728                     minFPos.clear();
729                                         for(int i=0;i<alnLength;i++){
730                                                 if(temp[i] != '-'){
731                                                         tempminFPos++;
732                                                 }
733                                         }
734                     minFPos.push_back(tempminFPos);
735                                 }else if(numDiff == minDiff){
736                                         minFGroup.push_back(it->second);
737                     int tempminFPos = 0;
738                                         for(int i=0;i<alnLength;i++){
739                                                 if(temp[i] != '-'){
740                                                         tempminFPos++;
741                                                 }
742                                         }
743                     minFPos.push_back(tempminFPos);
744                                 }
745                         }
746             
747                         //cout << minDiff << '\t' << minCount << '\t' << endl;
748                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
749                         else{   
750                 //check for reverse match
751                 if (alignment != NULL) {  delete alignment;  }
752                 
753                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
754                 else{ alignment = NULL; } 
755                 
756                 //can you find the barcode
757                 minDiff = 1e6;
758                 minCount = 1;
759                 vector< vector<int> > minRGroup;
760                 vector<int> minRPos;
761                 
762                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
763                     string oligo = it->first;
764                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
765                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
766                         success = bdiffs + 10;
767                         break;
768                     }
769                     
770                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
771                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
772                     oligo = alignment->getSeqAAln();
773                     string temp = alignment->getSeqBAln();
774                     
775                     int alnLength = oligo.length();
776                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
777                     oligo = oligo.substr(0,alnLength);
778                     temp = temp.substr(0,alnLength);
779                     int numDiff = countDiffs(oligo, temp);
780                     if (alnLength == 0) { numDiff = bdiffs + 100; }
781                     
782                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
783                     if(numDiff < minDiff){
784                         minDiff = numDiff;
785                         minCount = 1;
786                         minRGroup.clear();
787                         minRGroup.push_back(it->second);
788                         int tempminRPos = 0;
789                         minRPos.clear();
790                         for(int i=0;i<alnLength;i++){
791                             if(temp[i] != '-'){
792                                 tempminRPos++;
793                             }
794                         }
795                         minRPos.push_back(tempminRPos);                    
796                     }else if(numDiff == minDiff){
797                         int tempminRPos = 0;
798                         for(int i=0;i<alnLength;i++){
799                             if(temp[i] != '-'){
800                                 tempminRPos++;
801                             }
802                         }
803                         minRPos.push_back(tempminRPos);  
804                         minRGroup.push_back(it->second);
805                     }
806                     
807                 }
808                 
809                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
810                  for (int i = 0; i < minFGroup.size(); i++) { 
811                  cout << i << '\t';
812                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
813                  cout << endl;
814                  }
815                  cout << endl;
816                  for (int i = 0; i < minRGroup.size(); i++) { 
817                  cout << i << '\t';
818                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
819                  cout << endl;
820                  }
821                  cout << endl;*/
822                 if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
823                 else  {  
824                     bool foundMatch = false;
825                     vector<int> matches;
826                     for (int i = 0; i < minFGroup.size(); i++) {
827                         for (int j = 0; j < minFGroup[i].size(); j++) {
828                             for (int k = 0; k < minRGroup.size(); k++) {
829                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
830                             }
831                         }
832                     }
833                     
834                     int fStart = 0;
835                     int rStart = 0;
836                     if (matches.size() == 1) { 
837                         foundMatch = true;
838                         group = matches[0]; 
839                         fStart = minFPos[0];
840                         rStart = minRPos[0];
841                     }
842                     
843                     //we have an acceptable match for the forward and reverse, but do they match?
844                     if (foundMatch) {
845                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
846                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
847                         forwardQual.trimQScores(fStart, -1);
848                         reverseQual.trimQScores(rStart, -1);
849                         success = minDiff;
850                     }else { success = bdiffs + 100;     }
851                 }
852                         }
853                         
854                         if (alignment != NULL) {  delete alignment;  }
855                 }
856                 
857                 return success;
858                 
859         }
860         catch(exception& e) {
861                 m->errorOut(e, "TrimOligos", "stripIBarcode");
862                 exit(1);
863         }
864         
865 }
866 //*******************************************************************/
867 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
868         try {
869                 //look for forward barcode
870                 string rawSeq = seq.getUnaligned();
871                 int success = bdiffs + 1;       //guilty until proven innocent
872                 //cout << seq.getName() << endl;
873                 //can you find the forward barcode
874                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
875                         string foligo = it->second.forward;
876             string roligo = it->second.reverse;
877             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
878             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
879                         if(rawSeq.length() < foligo.length()){  //let's just assume that the barcodes are the same length
880                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
881                                 break;
882                         }
883             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
884                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
885                                 break;
886                         }
887                         
888                         if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
889                                 group = it->first;
890                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
891                 seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
892                 if(qual.getName() != ""){
893                     qual.trimQScores(-1, rawSeq.length()-roligo.length());
894                     qual.trimQScores(foligo.length(), -1);
895                 }
896                                 success = 0;
897                                 break;
898                         }
899                 }
900                 //cout << "success=" << success << endl;
901                 //if you found the barcode or if you don't want to allow for diffs
902                 if ((bdiffs == 0) || (success == 0)) { return success;  }
903                 else { //try aligning and see if you can find it
904                         Alignment* alignment;
905                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
906                         else{ alignment = NULL; }
907                         
908                         //can you find the barcode
909                         int minDiff = 1e6;
910                         int minCount = 1;
911                         vector< vector<int> > minFGroup;
912                         vector<int> minFPos;
913             
914             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
915             /*
916              1   Sarah   Westcott
917              2   John    Westcott
918              3   Anna    Westcott
919              4   Sarah   Schloss
920              5   Pat     Schloss
921              6   Gail    Brown
922              7   Pat     Moore
923              
924              only want to look for forward = Sarah, John, Anna, Pat, Gail
925              reverse = Westcott, Schloss, Brown, Moore
926              
927              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
928              */
929             //cout << endl << forwardSeq.getName() << endl;
930                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
931                                 string oligo = it->first;
932                                 
933                                 if(rawSeq.length() < maxFBarcodeLength){        //let's just assume that the barcodes are the same length
934                                         success = bdiffs + 10;
935                                         break;
936                                 }
937                                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
938                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
939                                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
940                                 oligo = alignment->getSeqAAln();
941                                 string temp = alignment->getSeqBAln();
942                 
943                                 int alnLength = oligo.length();
944                                 
945                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
946                                 oligo = oligo.substr(0,alnLength);
947                                 temp = temp.substr(0,alnLength);
948                 int numDiff = countDiffs(oligo, temp);
949                                 
950                 if (alnLength == 0) { numDiff = bdiffs + 100; }
951                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
952                 
953                                 if(numDiff < minDiff){
954                                         minDiff = numDiff;
955                                         minCount = 1;
956                                         minFGroup.clear();
957                     minFGroup.push_back(it->second);
958                                         int tempminFPos = 0;
959                     minFPos.clear();
960                                         for(int i=0;i<alnLength;i++){
961                                                 if(temp[i] != '-'){
962                                                         tempminFPos++;
963                                                 }
964                                         }
965                     minFPos.push_back(tempminFPos);
966                                 }else if(numDiff == minDiff){
967                                         minFGroup.push_back(it->second);
968                     int tempminFPos = 0;
969                                         for(int i=0;i<alnLength;i++){
970                                                 if(temp[i] != '-'){
971                                                         tempminFPos++;
972                                                 }
973                                         }
974                     minFPos.push_back(tempminFPos);
975                                 }
976                         }
977             
978                         //cout << minDiff << '\t' << minCount << '\t' << endl;
979                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
980                         else{
981                 //check for reverse match
982                 if (alignment != NULL) {  delete alignment;  }
983                 
984                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
985                 else{ alignment = NULL; }
986                 
987                 //can you find the barcode
988                 minDiff = 1e6;
989                 minCount = 1;
990                 vector< vector<int> > minRGroup;
991                 vector<int> minRPos;
992                 
993                 string rawRSequence = reverseOligo(seq.getUnaligned());
994                 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength <<  endl;
995                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
996                     string oligo = reverseOligo(it->first);
997                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
998                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
999                         success = bdiffs + 10;
1000                         break;
1001                     }
1002                     
1003                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1004                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
1005                     oligo = alignment->getSeqAAln();
1006                     string temp = alignment->getSeqBAln();
1007                     
1008                     int alnLength = oligo.length();
1009                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1010                     oligo = oligo.substr(0,alnLength);
1011                     temp = temp.substr(0,alnLength);
1012                     int numDiff = countDiffs(oligo, temp);
1013                     if (alnLength == 0) { numDiff = bdiffs + 100; }
1014                     
1015                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1016                     if(numDiff < minDiff){
1017                         minDiff = numDiff;
1018                         minCount = 1;
1019                         minRGroup.clear();
1020                         minRGroup.push_back(it->second);
1021                         int tempminRPos = 0;
1022                         minRPos.clear();
1023                         for(int i=0;i<alnLength;i++){
1024                             if(temp[i] != '-'){
1025                                 tempminRPos++;
1026                             }
1027                         }
1028                         minRPos.push_back(tempminRPos);
1029                     }else if(numDiff == minDiff){
1030                         int tempminRPos = 0;
1031                         for(int i=0;i<alnLength;i++){
1032                             if(temp[i] != '-'){
1033                                 tempminRPos++;
1034                             }
1035                         }
1036                         minRPos.push_back(tempminRPos);
1037                         minRGroup.push_back(it->second);
1038                     }
1039                     
1040                 }
1041                 
1042                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1043                  for (int i = 0; i < minFGroup.size(); i++) {
1044                  cout << i << '\t';
1045                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1046                  cout << endl;
1047                  }
1048                  cout << endl;
1049                  for (int i = 0; i < minRGroup.size(); i++) {
1050                  cout << i << '\t';
1051                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1052                  cout << endl;
1053                  }
1054                  cout << endl;*/
1055                 if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1056                 else  {
1057                     bool foundMatch = false;
1058                     vector<int> matches;
1059                     for (int i = 0; i < minFGroup.size(); i++) {
1060                         for (int j = 0; j < minFGroup[i].size(); j++) {
1061                             for (int k = 0; k < minRGroup.size(); k++) {
1062                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1063                             }
1064                         }
1065                     }
1066                     
1067                     int fStart = 0;
1068                     int rStart = 0;
1069                     if (matches.size() == 1) {
1070                         foundMatch = true;
1071                         group = matches[0];
1072                         fStart = minFPos[0];
1073                         rStart = rawSeq.length() - minRPos[0];
1074                     }
1075                     
1076                     //we have an acceptable match for the forward and reverse, but do they match?
1077                     if (foundMatch) {
1078                         string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1079                         seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1080                         if(qual.getName() != ""){
1081                             qual.trimQScores(-1, rStart);
1082                             qual.trimQScores(fStart, -1);
1083                         }
1084                         success = minDiff;
1085                         //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1086                     }else { success = bdiffs + 100;     }
1087                 }
1088                         }
1089                         
1090                         if (alignment != NULL) {  delete alignment;  }
1091                 }
1092                 
1093                 return success;
1094                 
1095         }
1096         catch(exception& e) {
1097                 m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1098                 exit(1);
1099         }
1100         
1101 }
1102 //*******************************************************************/
1103 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1104         try {
1105                 //look for forward 
1106                 string rawSeq = seq.getUnaligned();
1107                 int success = pdiffs + 1;       //guilty until proven innocent
1108         //cout << seq.getName() << endl;
1109                 //can you find the forward
1110                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1111                         string foligo = it->second.forward;
1112             string roligo = it->second.reverse;
1113             
1114             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1115             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1116                         if(rawSeq.length() < foligo.length()){  //let's just assume that the barcodes are the same length
1117                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1118                                 break;
1119                         }
1120             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
1121                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1122                                 break;
1123                         }
1124                         
1125                         if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
1126                                 group = it->first;
1127                 if (!keepForward) { 
1128                     string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1129                     seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1130                     if(qual.getName() != ""){
1131                         qual.trimQScores(-1, rawSeq.length()-roligo.length());
1132                         qual.trimQScores(foligo.length(), -1);
1133                     }
1134                 }
1135                                 success = 0;
1136                                 break;
1137                         }
1138                 }
1139                 //cout << "success=" << success << endl;
1140                 //if you found the barcode or if you don't want to allow for diffs
1141                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1142                 else { //try aligning and see if you can find it
1143                         Alignment* alignment;
1144                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1145                         else{ alignment = NULL; }
1146                         
1147                         //can you find the barcode
1148                         int minDiff = 1e6;
1149                         int minCount = 1;
1150                         vector< vector<int> > minFGroup;
1151                         vector<int> minFPos;
1152             
1153             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1154             /*
1155              1   Sarah   Westcott
1156              2   John    Westcott
1157              3   Anna    Westcott
1158              4   Sarah   Schloss
1159              5   Pat     Schloss
1160              6   Gail    Brown
1161              7   Pat     Moore
1162              
1163              only want to look for forward = Sarah, John, Anna, Pat, Gail
1164              reverse = Westcott, Schloss, Brown, Moore
1165              
1166              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
1167              */
1168             //cout << endl << forwardSeq.getName() << endl;
1169                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1170                                 string oligo = it->first;
1171                                 
1172                                 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1173                                         success = pdiffs + 10;
1174                                         break;
1175                                 }
1176                                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
1177                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1178                                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1179                                 oligo = alignment->getSeqAAln();
1180                                 string temp = alignment->getSeqBAln();
1181                 
1182                                 int alnLength = oligo.length();
1183                                 
1184                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1185                                 oligo = oligo.substr(0,alnLength);
1186                                 temp = temp.substr(0,alnLength);
1187                 int numDiff = countDiffs(oligo, temp);
1188                                 
1189                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1190                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1191                 
1192                                 if(numDiff < minDiff){
1193                                         minDiff = numDiff;
1194                                         minCount = 1;
1195                                         minFGroup.clear();
1196                     minFGroup.push_back(it->second);
1197                                         int tempminFPos = 0;
1198                     minFPos.clear();
1199                                         for(int i=0;i<alnLength;i++){
1200                                                 if(temp[i] != '-'){
1201                                                         tempminFPos++;
1202                                                 }
1203                                         }
1204                     minFPos.push_back(tempminFPos);
1205                                 }else if(numDiff == minDiff){
1206                                         minFGroup.push_back(it->second);
1207                     int tempminFPos = 0;
1208                                         for(int i=0;i<alnLength;i++){
1209                                                 if(temp[i] != '-'){
1210                                                         tempminFPos++;
1211                                                 }
1212                                         }
1213                     minFPos.push_back(tempminFPos);
1214                                 }
1215                         }
1216             
1217                         //cout << minDiff << '\t' << minCount << '\t' << endl;
1218                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1219                         else{
1220                 //check for reverse match
1221                 if (alignment != NULL) {  delete alignment;  }
1222                 
1223                 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
1224                 else{ alignment = NULL; }
1225                 
1226                 //can you find the barcode
1227                 minDiff = 1e6;
1228                 minCount = 1;
1229                 vector< vector<int> > minRGroup;
1230                 vector<int> minRPos;
1231                 
1232                 string rawRSequence = reverseOligo(seq.getUnaligned());
1233                 
1234                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1235                     string oligo = reverseOligo(it->first);
1236                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1237                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1238                         success = pdiffs + 10;
1239                         break;
1240                     }
1241                     
1242                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1243                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1244                     oligo = alignment->getSeqAAln();
1245                     string temp = alignment->getSeqBAln();
1246                     
1247                     int alnLength = oligo.length();
1248                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1249                     oligo = oligo.substr(0,alnLength);
1250                     temp = temp.substr(0,alnLength);
1251                     int numDiff = countDiffs(oligo, temp);
1252                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1253                     
1254                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1255                     if(numDiff < minDiff){
1256                         minDiff = numDiff;
1257                         minCount = 1;
1258                         minRGroup.clear();
1259                         minRGroup.push_back(it->second);
1260                         int tempminRPos = 0;
1261                         minRPos.clear();
1262                         for(int i=0;i<alnLength;i++){
1263                             if(temp[i] != '-'){
1264                                 tempminRPos++;
1265                             }
1266                         }
1267                         minRPos.push_back(tempminRPos);
1268                     }else if(numDiff == minDiff){
1269                         int tempminRPos = 0;
1270                         for(int i=0;i<alnLength;i++){
1271                             if(temp[i] != '-'){
1272                                 tempminRPos++;
1273                             }
1274                         }
1275                         minRPos.push_back(tempminRPos);
1276                         minRGroup.push_back(it->second);
1277                     }
1278                     
1279                 }
1280                 
1281                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1282                  for (int i = 0; i < minFGroup.size(); i++) {
1283                  cout << i << '\t';
1284                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1285                  cout << endl;
1286                  }
1287                  cout << endl;
1288                  for (int i = 0; i < minRGroup.size(); i++) {
1289                  cout << i << '\t';
1290                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1291                  cout << endl;
1292                  }
1293                  cout << endl;*/
1294                 if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1295                 else  {
1296                     bool foundMatch = false;
1297                     vector<int> matches;
1298                     for (int i = 0; i < minFGroup.size(); i++) {
1299                         for (int j = 0; j < minFGroup[i].size(); j++) {
1300                             for (int k = 0; k < minRGroup.size(); k++) {
1301                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1302                             }
1303                         }
1304                     }
1305                     
1306                     int fStart = 0;
1307                     int rStart = 0;
1308                     if (matches.size() == 1) {
1309                         foundMatch = true;
1310                         group = matches[0];
1311                         fStart = minFPos[0];
1312                         rStart = rawSeq.length() - minRPos[0];
1313                     }
1314                     
1315                     //we have an acceptable match for the forward and reverse, but do they match?
1316                     if (foundMatch) {
1317                         if (!keepForward) {
1318                             string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1319                             seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1320                             if(qual.getName() != ""){
1321                                 qual.trimQScores(-1, rStart);
1322                                 qual.trimQScores(fStart, -1);
1323                             }
1324                         }
1325                         success = minDiff;
1326                         //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1327                     }else { success = pdiffs + 100;     }
1328                 }
1329                         }
1330                         
1331                         if (alignment != NULL) {  delete alignment;  }
1332                 }
1333                 
1334                 return success;
1335                 
1336         }
1337         catch(exception& e) {
1338                 m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1339                 exit(1);
1340         }
1341         
1342 }
1343
1344 //*******************************************************************/
1345 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1346         try {
1347                 //look for forward barcode
1348                 string rawFSequence = forwardSeq.getUnaligned();
1349         string rawRSequence = reverseSeq.getUnaligned();
1350                 int success = pdiffs + 1;       //guilty until proven innocent
1351                 
1352                 //can you find the forward barcode
1353                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1354                         string foligo = it->second.forward;
1355             string roligo = it->second.reverse;
1356             
1357                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
1358                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1359                                 break;  
1360                         }
1361             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1362                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1363                                 break;  
1364                         }
1365                         
1366                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1367                                 group = it->first;
1368                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1369                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1370                 forwardQual.trimQScores(foligo.length(), -1);
1371                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
1372                                 success = 0;
1373                                 break;
1374                         }
1375                 }
1376                 
1377                 //if you found the barcode or if you don't want to allow for diffs
1378                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1379                 else { //try aligning and see if you can find it
1380                         Alignment* alignment;
1381                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1382                         else{ alignment = NULL; } 
1383                         
1384                         //can you find the barcode
1385                         int minDiff = 1e6;
1386                         int minCount = 1;
1387                         vector< vector<int> > minFGroup;
1388                         vector<int> minFPos; 
1389             
1390             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1391             /*
1392              1   Sarah   Westcott
1393              2   John    Westcott
1394              3   Anna    Westcott
1395              4   Sarah   Schloss
1396              5   Pat     Schloss
1397              6   Gail    Brown
1398              7   Pat     Moore
1399              
1400              only want to look for forward = Sarah, John, Anna, Pat, Gail
1401              reverse = Westcott, Schloss, Brown, Moore
1402              
1403              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
1404              */
1405             //cout << endl << forwardSeq.getName() << endl;         
1406                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1407                                 string oligo = it->first;
1408                                 
1409                                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1410                                         success = pdiffs + 10;
1411                                         break;
1412                                 }
1413                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1414                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1415                                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1416                                 oligo = alignment->getSeqAAln();
1417                                 string temp = alignment->getSeqBAln();
1418                 
1419                                 int alnLength = oligo.length();
1420                                 
1421                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1422                                 oligo = oligo.substr(0,alnLength);
1423                                 temp = temp.substr(0,alnLength);
1424                 int numDiff = countDiffs(oligo, temp);
1425                                 
1426                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1427                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1428                 
1429                                 if(numDiff < minDiff){
1430                                         minDiff = numDiff;
1431                                         minCount = 1;
1432                                         minFGroup.clear();
1433                     minFGroup.push_back(it->second);
1434                                         int tempminFPos = 0;
1435                     minFPos.clear();
1436                                         for(int i=0;i<alnLength;i++){
1437                                                 if(temp[i] != '-'){
1438                                                         tempminFPos++;
1439                                                 }
1440                                         }
1441                     minFPos.push_back(tempminFPos);
1442                                 }else if(numDiff == minDiff){
1443                                         minFGroup.push_back(it->second);
1444                     int tempminFPos = 0;
1445                                         for(int i=0;i<alnLength;i++){
1446                                                 if(temp[i] != '-'){
1447                                                         tempminFPos++;
1448                                                 }
1449                                         }
1450                     minFPos.push_back(tempminFPos);
1451                                 }
1452                         }
1453             
1454                         //cout << minDiff << '\t' << minCount << '\t' << endl;
1455                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1456                         else{   
1457                 //check for reverse match
1458                 if (alignment != NULL) {  delete alignment;  }
1459                 
1460                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
1461                 else{ alignment = NULL; } 
1462                 
1463                 //can you find the barcode
1464                 minDiff = 1e6;
1465                 minCount = 1;
1466                 vector< vector<int> > minRGroup;
1467                 vector<int> minRPos;
1468                 
1469                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1470                     string oligo = it->first;
1471                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1472                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1473                         success = pdiffs + 10;
1474                         break;
1475                     }
1476                     
1477                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1478                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1479                     oligo = alignment->getSeqAAln();
1480                     string temp = alignment->getSeqBAln();
1481                     
1482                     int alnLength = oligo.length();
1483                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1484                     oligo = oligo.substr(0,alnLength);
1485                     temp = temp.substr(0,alnLength);
1486                     int numDiff = countDiffs(oligo, temp);
1487                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1488                     
1489                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1490                     if(numDiff < minDiff){
1491                         minDiff = numDiff;
1492                         minCount = 1;
1493                         minRGroup.clear();
1494                         minRGroup.push_back(it->second);
1495                         int tempminRPos = 0;
1496                         minRPos.clear();
1497                         for(int i=0;i<alnLength;i++){
1498                             if(temp[i] != '-'){
1499                                 tempminRPos++;
1500                             }
1501                         }
1502                         minRPos.push_back(tempminRPos);                    
1503                     }else if(numDiff == minDiff){
1504                         int tempminRPos = 0;
1505                         for(int i=0;i<alnLength;i++){
1506                             if(temp[i] != '-'){
1507                                 tempminRPos++;
1508                             }
1509                         }
1510                         minRPos.push_back(tempminRPos);  
1511                         minRGroup.push_back(it->second);
1512                     }
1513                     
1514                 }
1515                 
1516                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1517                  for (int i = 0; i < minFGroup.size(); i++) { 
1518                  cout << i << '\t';
1519                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1520                  cout << endl;
1521                  }
1522                  cout << endl;
1523                  for (int i = 0; i < minRGroup.size(); i++) { 
1524                  cout << i << '\t';
1525                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1526                  cout << endl;
1527                  }
1528                  cout << endl;*/
1529                 if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1530                 else  {  
1531                     bool foundMatch = false;
1532                     vector<int> matches;
1533                     for (int i = 0; i < minFGroup.size(); i++) {
1534                         for (int j = 0; j < minFGroup[i].size(); j++) {
1535                             for (int k = 0; k < minRGroup.size(); k++) {
1536                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1537                             }
1538                         }
1539                     }
1540                     
1541                     int fStart = 0;
1542                     int rStart = 0;
1543                     if (matches.size() == 1) { 
1544                         foundMatch = true;
1545                         group = matches[0]; 
1546                         fStart = minFPos[0];
1547                         rStart = minRPos[0];
1548                     }
1549                     
1550                     //we have an acceptable match for the forward and reverse, but do they match?
1551                     if (foundMatch) {
1552                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1553                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1554                         forwardQual.trimQScores(fStart, -1);
1555                         reverseQual.trimQScores(rStart, -1);
1556                         success = minDiff;
1557                     }else { success = pdiffs + 100;     }
1558                 }
1559                         }
1560                         
1561                         if (alignment != NULL) {  delete alignment;  }
1562                 }
1563                 
1564                 return success;
1565                 
1566         }
1567         catch(exception& e) {
1568                 m->errorOut(e, "TrimOligos", "stripIForward");
1569                 exit(1);
1570         }
1571         
1572 }
1573 //*******************************************************************/
1574 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1575         try {
1576                 //look for forward barcode
1577                 string rawFSequence = forwardSeq.getUnaligned();
1578         string rawRSequence = reverseSeq.getUnaligned();
1579                 int success = pdiffs + 1;       //guilty until proven innocent
1580                 
1581                 //can you find the forward barcode
1582                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1583                         string foligo = it->second.forward;
1584             string roligo = it->second.reverse;
1585             
1586                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
1587                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1588                                 break;  
1589                         }
1590             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1591                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1592                                 break;  
1593                         }
1594                         
1595                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1596                                 group = it->first;
1597                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1598                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1599                                 success = 0;
1600                                 break;
1601                         }
1602                 }
1603                 
1604                 //if you found the barcode or if you don't want to allow for diffs
1605                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1606                 else { //try aligning and see if you can find it
1607                         Alignment* alignment;
1608                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1609                         else{ alignment = NULL; } 
1610                         
1611                         //can you find the barcode
1612                         int minDiff = 1e6;
1613                         int minCount = 1;
1614                         vector< vector<int> > minFGroup;
1615                         vector<int> minFPos; 
1616             
1617             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1618             /*
1619              1   Sarah   Westcott
1620              2   John    Westcott
1621              3   Anna    Westcott
1622              4   Sarah   Schloss
1623              5   Pat     Schloss
1624              6   Gail    Brown
1625              7   Pat     Moore
1626              
1627              only want to look for forward = Sarah, John, Anna, Pat, Gail
1628              reverse = Westcott, Schloss, Brown, Moore
1629              
1630              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
1631              */
1632             //cout << endl << forwardSeq.getName() << endl;         
1633                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1634                                 string oligo = it->first;
1635                                 
1636                                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1637                                         success = pdiffs + 10;
1638                                         break;
1639                                 }
1640                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1641                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1642                                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1643                                 oligo = alignment->getSeqAAln();
1644                                 string temp = alignment->getSeqBAln();
1645                 
1646                                 int alnLength = oligo.length();
1647                                 
1648                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1649                                 oligo = oligo.substr(0,alnLength);
1650                                 temp = temp.substr(0,alnLength);
1651                 int numDiff = countDiffs(oligo, temp);
1652                                 
1653                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1654                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1655                 
1656                                 if(numDiff < minDiff){
1657                                         minDiff = numDiff;
1658                                         minCount = 1;
1659                                         minFGroup.clear();
1660                     minFGroup.push_back(it->second);
1661                                         int tempminFPos = 0;
1662                     minFPos.clear();
1663                                         for(int i=0;i<alnLength;i++){
1664                                                 if(temp[i] != '-'){
1665                                                         tempminFPos++;
1666                                                 }
1667                                         }
1668                     minFPos.push_back(tempminFPos);
1669                                 }else if(numDiff == minDiff){
1670                                         minFGroup.push_back(it->second);
1671                     int tempminFPos = 0;
1672                                         for(int i=0;i<alnLength;i++){
1673                                                 if(temp[i] != '-'){
1674                                                         tempminFPos++;
1675                                                 }
1676                                         }
1677                     minFPos.push_back(tempminFPos);
1678                                 }
1679                         }
1680             
1681                         //cout << minDiff << '\t' << minCount << '\t' << endl;
1682                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1683                         else{   
1684                 //check for reverse match
1685                 if (alignment != NULL) {  delete alignment;  }
1686                 
1687                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
1688                 else{ alignment = NULL; } 
1689                 
1690                 //can you find the barcode
1691                 minDiff = 1e6;
1692                 minCount = 1;
1693                 vector< vector<int> > minRGroup;
1694                 vector<int> minRPos;
1695                 
1696                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1697                     string oligo = it->first;
1698                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1699                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1700                         success = pdiffs + 10;
1701                         break;
1702                     }
1703                     
1704                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1705                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1706                     oligo = alignment->getSeqAAln();
1707                     string temp = alignment->getSeqBAln();
1708                     
1709                     int alnLength = oligo.length();
1710                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1711                     oligo = oligo.substr(0,alnLength);
1712                     temp = temp.substr(0,alnLength);
1713                     int numDiff = countDiffs(oligo, temp);
1714                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1715                     
1716                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1717                     if(numDiff < minDiff){
1718                         minDiff = numDiff;
1719                         minCount = 1;
1720                         minRGroup.clear();
1721                         minRGroup.push_back(it->second);
1722                         int tempminRPos = 0;
1723                         minRPos.clear();
1724                         for(int i=0;i<alnLength;i++){
1725                             if(temp[i] != '-'){
1726                                 tempminRPos++;
1727                             }
1728                         }
1729                         minRPos.push_back(tempminRPos);                    
1730                     }else if(numDiff == minDiff){
1731                         int tempminRPos = 0;
1732                         for(int i=0;i<alnLength;i++){
1733                             if(temp[i] != '-'){
1734                                 tempminRPos++;
1735                             }
1736                         }
1737                         minRPos.push_back(tempminRPos);  
1738                         minRGroup.push_back(it->second);
1739                     }
1740                     
1741                 }
1742                 
1743                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1744                  for (int i = 0; i < minFGroup.size(); i++) { 
1745                  cout << i << '\t';
1746                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1747                  cout << endl;
1748                  }
1749                  cout << endl;
1750                  for (int i = 0; i < minRGroup.size(); i++) { 
1751                  cout << i << '\t';
1752                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1753                  cout << endl;
1754                  }
1755                  cout << endl;*/
1756                 if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1757                 else  {  
1758                     bool foundMatch = false;
1759                     vector<int> matches;
1760                     for (int i = 0; i < minFGroup.size(); i++) {
1761                         for (int j = 0; j < minFGroup[i].size(); j++) {
1762                             for (int k = 0; k < minRGroup.size(); k++) {
1763                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1764                             }
1765                         }
1766                     }
1767                     
1768                     int fStart = 0;
1769                     int rStart = 0;
1770                     if (matches.size() == 1) { 
1771                         foundMatch = true;
1772                         group = matches[0]; 
1773                         fStart = minFPos[0];
1774                         rStart = minRPos[0];
1775                     }
1776                     
1777                     //we have an acceptable match for the forward and reverse, but do they match?
1778                     if (foundMatch) {
1779                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1780                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1781                         success = minDiff;
1782                     }else { success = pdiffs + 100;     }
1783                 }
1784                         }
1785                         
1786                         if (alignment != NULL) {  delete alignment;  }
1787                 }
1788                 
1789                 return success;
1790                 
1791         }
1792         catch(exception& e) {
1793                 m->errorOut(e, "TrimOligos", "stripIForward");
1794                 exit(1);
1795         }
1796         
1797 }
1798 //*******************************************************************/
1799 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1800         try {
1801                 
1802                 string rawSequence = seq.getUnaligned();
1803                 int success = bdiffs + 1;       //guilty until proven innocent
1804                 
1805                 //can you find the barcode
1806                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1807                         string oligo = it->first;
1808                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1809                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1810                                 break;  
1811                         }
1812                         
1813                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1814                                 group = it->second;
1815                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1816                                 
1817                                 success = 0;
1818                                 break;
1819                         }
1820                 }
1821                 
1822                 //if you found the barcode or if you don't want to allow for diffs
1823                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1824                 
1825                 else { //try aligning and see if you can find it
1826             Alignment* alignment;
1827                         if (barcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));  }
1828                         else{ alignment = NULL; } 
1829                         
1830                         //can you find the barcode
1831                         int minDiff = 1e6;
1832                         int minCount = 1;
1833                         int minGroup = -1;
1834                         int minPos = 0;
1835                         
1836                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1837                                 string oligo = it->first;
1838                                 //                              int length = oligo.length();
1839                                 
1840                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
1841                                         success = bdiffs + 10;
1842                                         break;
1843                                 }
1844                                 
1845                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1846                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1847                                 oligo = alignment->getSeqAAln();
1848                                 string temp = alignment->getSeqBAln();
1849                                  
1850                                 int alnLength = oligo.length();
1851                                 
1852                                 for(int i=oligo.length()-1;i>=0;i--){
1853                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1854                                 }
1855                                 oligo = oligo.substr(0,alnLength);
1856                                 temp = temp.substr(0,alnLength);
1857                                  
1858                                 int numDiff = countDiffs(oligo, temp);
1859                                 
1860                                 if(numDiff < minDiff){
1861                                         minDiff = numDiff;
1862                                         minCount = 1;
1863                                         minGroup = it->second;
1864                                         minPos = 0;
1865                                         for(int i=0;i<alnLength;i++){
1866                                                 if(temp[i] != '-'){
1867                                                         minPos++;
1868                                                 }
1869                                         }
1870                                 }
1871                                 else if(numDiff == minDiff){
1872                                         minCount++;
1873                                 }
1874                                 
1875                         }
1876                         
1877                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1878                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1879                         else{                                                                                                   //use the best match
1880                                 group = minGroup;
1881                                 seq.setUnaligned(rawSequence.substr(minPos));
1882                                 success = minDiff;
1883                         }
1884                         
1885                         if (alignment != NULL) {  delete alignment;  }
1886                         
1887                 }
1888                 
1889                 return success;
1890                 
1891         }
1892         catch(exception& e) {
1893                 m->errorOut(e, "TrimOligos", "stripBarcode");
1894                 exit(1);
1895         }
1896         
1897 }
1898
1899 /********************************************************************/
1900 int TrimOligos::stripForward(Sequence& seq, int& group){
1901         try {
1902                 
1903                 string rawSequence = seq.getUnaligned();
1904                 int success = pdiffs + 1;       //guilty until proven innocent
1905                 
1906                 //can you find the primer
1907                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1908                         string oligo = it->first;
1909                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1910                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1911                                 break;  
1912                         }
1913                         
1914                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1915                                 group = it->second;
1916                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1917                                 success = 0;
1918                                 break;
1919                         }
1920                 }
1921                 
1922                 //if you found the barcode or if you don't want to allow for diffs
1923                 if ((pdiffs == 0) || (success == 0)) {  return success;  }
1924                 
1925                 else { //try aligning and see if you can find it
1926                         Alignment* alignment;
1927                         if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1928             else{ alignment = NULL; } 
1929                         
1930                         //can you find the barcode
1931                         int minDiff = 1e6;
1932                         int minCount = 1;
1933                         int minGroup = -1;
1934                         int minPos = 0;
1935                         
1936                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1937                                 string oligo = it->first;
1938                                 //                              int length = oligo.length();
1939                                 
1940                                 if(rawSequence.length() < maxFPrimerLength){    
1941                                         success = pdiffs + 100;
1942                                         break;
1943                                 }
1944                                 
1945                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1946                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1947                                 oligo = alignment->getSeqAAln();
1948                                 string temp = alignment->getSeqBAln();
1949                                 
1950                                 int alnLength = oligo.length();
1951                                 
1952                                 for(int i=oligo.length()-1;i>=0;i--){
1953                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1954                                 }
1955                                 oligo = oligo.substr(0,alnLength);
1956                                 temp = temp.substr(0,alnLength);
1957                                 
1958                                 int numDiff = countDiffs(oligo, temp);
1959                                 
1960                                 if(numDiff < minDiff){
1961                                         minDiff = numDiff;
1962                                         minCount = 1;
1963                                         minGroup = it->second;
1964                                         minPos = 0;
1965                                         for(int i=0;i<alnLength;i++){
1966                                                 if(temp[i] != '-'){
1967                                                         minPos++;
1968                                                 }
1969                                         }
1970                                 }
1971                                 else if(numDiff == minDiff){
1972                                         minCount++;
1973                                 }
1974                                 
1975                         }
1976                         
1977                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1978                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1979                         else{                                                                                                   //use the best match
1980                                 group = minGroup;
1981                                 seq.setUnaligned(rawSequence.substr(minPos));
1982                                 success = minDiff;
1983                         }
1984                         
1985                         if (alignment != NULL) {  delete alignment;  }
1986                         
1987                 }
1988                 
1989                 return success;
1990                 
1991         }
1992         catch(exception& e) {
1993                 m->errorOut(e, "TrimOligos", "stripForward");
1994                 exit(1);
1995         }
1996 }
1997 //*******************************************************************/
1998 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1999         try {
2000         
2001         if (paired) {  int success = stripPairedPrimers(seq, qual, group, keepForward);  return success; }
2002         
2003                 string rawSequence = seq.getUnaligned();
2004                 int success = pdiffs + 1;       //guilty until proven innocent
2005                 
2006                 //can you find the primer
2007                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2008                         string oligo = it->first;
2009                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
2010                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
2011                                 break;  
2012                         }
2013                         
2014                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2015                                 group = it->second;
2016                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
2017                                 if(qual.getName() != ""){
2018                                         if (!keepForward) {  qual.trimQScores(oligo.length(), -1); }
2019                                 }
2020                                 success = 0;
2021                                 break;
2022                         }
2023                 }
2024                 
2025                 //if you found the barcode or if you don't want to allow for diffs
2026                 if ((pdiffs == 0) || (success == 0)) { return success;  }
2027                 
2028                 else { //try aligning and see if you can find it
2029                         Alignment* alignment;
2030                         if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
2031                         else{ alignment = NULL; } 
2032                         
2033                         //can you find the barcode
2034                         int minDiff = 1e6;
2035                         int minCount = 1;
2036                         int minGroup = -1;
2037                         int minPos = 0;
2038                         
2039                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2040                                 string oligo = it->first;
2041                                 //                              int length = oligo.length();
2042                                 
2043                                 if(rawSequence.length() < maxFPrimerLength){    
2044                                         success = pdiffs + 100;
2045                                         break;
2046                                 }
2047                                 
2048                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2049                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
2050                                 oligo = alignment->getSeqAAln();
2051                                 string temp = alignment->getSeqBAln();
2052                                 
2053                                 int alnLength = oligo.length();
2054                                 
2055                                 for(int i=oligo.length()-1;i>=0;i--){
2056                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2057                                 }
2058                                 oligo = oligo.substr(0,alnLength);
2059                                 temp = temp.substr(0,alnLength);
2060                                 
2061                                 int numDiff = countDiffs(oligo, temp);
2062                                 
2063                                 if(numDiff < minDiff){
2064                                         minDiff = numDiff;
2065                                         minCount = 1;
2066                                         minGroup = it->second;
2067                                         minPos = 0;
2068                                         for(int i=0;i<alnLength;i++){
2069                                                 if(temp[i] != '-'){
2070                                                         minPos++;
2071                                                 }
2072                                         }
2073                                 }
2074                                 else if(numDiff == minDiff){
2075                                         minCount++;
2076                                 }
2077                                 
2078                         }
2079                         
2080                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
2081                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
2082                         else{                                                                                                   //use the best match
2083                                 group = minGroup;
2084                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2085                                 if(qual.getName() != ""){
2086                                         if (!keepForward) { qual.trimQScores(minPos, -1); }
2087                                 }
2088                                 success = minDiff;
2089                         }
2090                         
2091                         if (alignment != NULL) {  delete alignment;  }
2092                         
2093                 }
2094                 
2095                 return success;
2096                 
2097         }
2098         catch(exception& e) {
2099                 m->errorOut(e, "TrimOligos", "stripForward");
2100                 exit(1);
2101         }
2102 }
2103 //******************************************************************/
2104 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2105         try {
2106                 string rawSequence = seq.getUnaligned();
2107                 bool success = 0;       //guilty until proven innocent
2108                 
2109                 for(int i=0;i<revPrimer.size();i++){
2110                         string oligo = revPrimer[i];
2111                         
2112                         if(rawSequence.length() < oligo.length()){
2113                                 success = 0;
2114                                 break;
2115                         }
2116                         
2117                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2118                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2119                                 if(qual.getName() != ""){
2120                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
2121                                 }
2122                                 success = 1;
2123                                 break;
2124                         }
2125                 }       
2126                 return success;
2127                 
2128         }
2129         catch(exception& e) {
2130                 m->errorOut(e, "TrimOligos", "stripReverse");
2131                 exit(1);
2132         }
2133 }
2134 //******************************************************************/
2135 bool TrimOligos::stripReverse(Sequence& seq){
2136         try {
2137                 
2138                 string rawSequence = seq.getUnaligned();
2139                 bool success = 0;       //guilty until proven innocent
2140                 
2141                 for(int i=0;i<revPrimer.size();i++){
2142                         string oligo = revPrimer[i];
2143                         
2144                         if(rawSequence.length() < oligo.length()){
2145                                 success = 0;
2146                                 break;
2147                         }
2148                         
2149                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2150                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2151                                 success = 1;
2152                                 break;
2153                         }
2154                 }       
2155                 
2156                 return success;
2157                 
2158         }
2159         catch(exception& e) {
2160                 m->errorOut(e, "TrimOligos", "stripReverse");
2161                 exit(1);
2162         }
2163 }
2164 //******************************************************************/
2165 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2166         try {
2167                 string rawSequence = seq.getUnaligned();
2168                 bool success = ldiffs + 1;      //guilty until proven innocent
2169                 
2170                 for(int i=0;i<linker.size();i++){
2171                         string oligo = linker[i];
2172                         
2173                         if(rawSequence.length() < oligo.length()){
2174                                 success = ldiffs + 10;
2175                                 break;
2176                         }
2177                         
2178                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2179                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2180                                 if(qual.getName() != ""){
2181                                         qual.trimQScores(oligo.length(), -1);
2182                                 }
2183                                 success = 0;
2184                                 break;
2185                         }
2186                 }
2187         
2188         //if you found the linker or if you don't want to allow for diffs
2189                 if ((ldiffs == 0) || (success == 0)) { return success;  }
2190                 
2191                 else { //try aligning and see if you can find it
2192                         Alignment* alignment;
2193                         if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));   }     
2194                         else{ alignment = NULL; } 
2195                         
2196                         //can you find the barcode
2197                         int minDiff = 1e6;
2198                         int minCount = 1;
2199                         int minPos = 0;
2200                         
2201                         for(int i = 0; i < linker.size(); i++){
2202                                 string oligo = linker[i];
2203                                 //                              int length = oligo.length();
2204                                 
2205                                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2206                                         success = ldiffs + 10;
2207                                         break;
2208                                 }
2209                                 
2210                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2211                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2212                                 oligo = alignment->getSeqAAln();
2213                                 string temp = alignment->getSeqBAln();
2214                                 
2215                                 int alnLength = oligo.length();
2216                                 
2217                                 for(int i=oligo.length()-1;i>=0;i--){
2218                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2219                                 }
2220                                 oligo = oligo.substr(0,alnLength);
2221                                 temp = temp.substr(0,alnLength);
2222                                 
2223                                 int numDiff = countDiffs(oligo, temp);
2224                                 
2225                                 if(numDiff < minDiff){
2226                                         minDiff = numDiff;
2227                                         minCount = 1;
2228                                         minPos = 0;
2229                                         for(int i=0;i<alnLength;i++){
2230                                                 if(temp[i] != '-'){
2231                                                         minPos++;
2232                                                 }
2233                                         }
2234                                 }
2235                                 else if(numDiff == minDiff){
2236                                         minCount++;
2237                                 }
2238                                 
2239                         }
2240                         
2241                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
2242                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2243                         else{                                                                                                   //use the best match
2244                                 seq.setUnaligned(rawSequence.substr(minPos));
2245                                 
2246                                 if(qual.getName() != ""){
2247                                         qual.trimQScores(minPos, -1);
2248                                 }
2249                                 success = minDiff;
2250                         }
2251                         
2252                         if (alignment != NULL) {  delete alignment;  }
2253                         
2254                 }
2255
2256        
2257                 return success;
2258                 
2259         }
2260         catch(exception& e) {
2261                 m->errorOut(e, "TrimOligos", "stripLinker");
2262                 exit(1);
2263         }
2264 }
2265 //******************************************************************/
2266 bool TrimOligos::stripLinker(Sequence& seq){
2267         try {
2268                 
2269                 string rawSequence = seq.getUnaligned();
2270                 bool success = ldiffs +1;       //guilty until proven innocent
2271                 
2272                 for(int i=0;i<linker.size();i++){
2273                         string oligo = linker[i];
2274                         
2275                         if(rawSequence.length() < oligo.length()){
2276                                 success = ldiffs +10;
2277                                 break;
2278                         }
2279                         
2280                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2281                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2282                                 success = 0;
2283                                 break;
2284                         }
2285                 }       
2286                 
2287         //if you found the linker or if you don't want to allow for diffs
2288                 if ((ldiffs == 0) || (success == 0)) { return success;  }
2289                 
2290                 else { //try aligning and see if you can find it
2291                         Alignment* alignment;
2292                         if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));  }
2293             else{ alignment = NULL; } 
2294                         
2295                         //can you find the barcode
2296                         int minDiff = 1e6;
2297                         int minCount = 1;
2298                         int minPos = 0;
2299                         
2300                         for(int i = 0; i < linker.size(); i++){
2301                                 string oligo = linker[i];
2302                                 //                              int length = oligo.length();
2303                                 
2304                                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2305                                         success = ldiffs + 10;
2306                                         break;
2307                                 }
2308                                 
2309                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2310                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2311                                 oligo = alignment->getSeqAAln();
2312                                 string temp = alignment->getSeqBAln();
2313                                 
2314                                 int alnLength = oligo.length();
2315                                 
2316                                 for(int i=oligo.length()-1;i>=0;i--){
2317                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2318                                 }
2319                                 oligo = oligo.substr(0,alnLength);
2320                                 temp = temp.substr(0,alnLength);
2321                                 
2322                                 int numDiff = countDiffs(oligo, temp);
2323                                 
2324                                 if(numDiff < minDiff){
2325                                         minDiff = numDiff;
2326                                         minCount = 1;
2327                                         minPos = 0;
2328                                         for(int i=0;i<alnLength;i++){
2329                                                 if(temp[i] != '-'){
2330                                                         minPos++;
2331                                                 }
2332                                         }
2333                                 }
2334                                 else if(numDiff == minDiff){
2335                                         minCount++;
2336                                 }
2337                                 
2338                         }
2339                         
2340                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
2341                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2342                         else{                                                                                                   //use the best match
2343                                 seq.setUnaligned(rawSequence.substr(minPos));
2344                                 success = minDiff;
2345                         }
2346                         
2347                         if (alignment != NULL) {  delete alignment;  }
2348                         
2349                 }
2350
2351                 return success;
2352                 
2353         }
2354         catch(exception& e) {
2355                 m->errorOut(e, "TrimOligos", "stripLinker");
2356                 exit(1);
2357         }
2358 }
2359
2360 //******************************************************************/
2361 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2362         try {
2363                 string rawSequence = seq.getUnaligned();
2364                 bool success = sdiffs+1;        //guilty until proven innocent
2365                 
2366                 for(int i=0;i<spacer.size();i++){
2367                         string oligo = spacer[i];
2368                         
2369                         if(rawSequence.length() < oligo.length()){
2370                                 success = sdiffs+10;
2371                                 break;
2372                         }
2373                         
2374                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2375                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2376                                 if(qual.getName() != ""){
2377                                         qual.trimQScores(oligo.length(), -1);
2378                                 }
2379                                 success = 0;
2380                                 break;
2381                         }
2382                 }
2383         
2384         //if you found the spacer or if you don't want to allow for diffs
2385                 if ((sdiffs == 0) || (success == 0)) { return success;  }
2386                 
2387                 else { //try aligning and see if you can find it
2388                         Alignment* alignment;
2389                         if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
2390             else{ alignment = NULL; } 
2391                         
2392                         //can you find the barcode
2393                         int minDiff = 1e6;
2394                         int minCount = 1;
2395                         int minPos = 0;
2396                         
2397                         for(int i = 0; i < spacer.size(); i++){
2398                                 string oligo = spacer[i];
2399                                 //                              int length = oligo.length();
2400                                 
2401                                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2402                                         success = sdiffs + 10;
2403                                         break;
2404                                 }
2405                                 
2406                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2407                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2408                                 oligo = alignment->getSeqAAln();
2409                                 string temp = alignment->getSeqBAln();
2410                                 
2411                                 int alnLength = oligo.length();
2412                                 
2413                                 for(int i=oligo.length()-1;i>=0;i--){
2414                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2415                                 }
2416                                 oligo = oligo.substr(0,alnLength);
2417                                 temp = temp.substr(0,alnLength);
2418                                 
2419                                 int numDiff = countDiffs(oligo, temp);
2420                                 
2421                                 if(numDiff < minDiff){
2422                                         minDiff = numDiff;
2423                                         minCount = 1;
2424                                         minPos = 0;
2425                                         for(int i=0;i<alnLength;i++){
2426                                                 if(temp[i] != '-'){
2427                                                         minPos++;
2428                                                 }
2429                                         }
2430                                 }
2431                                 else if(numDiff == minDiff){
2432                                         minCount++;
2433                                 }
2434                                 
2435                         }
2436                         
2437                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
2438                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2439                         else{                                                                                                   //use the best match
2440                                 seq.setUnaligned(rawSequence.substr(minPos));
2441                                 
2442                                 if(qual.getName() != ""){
2443                                         qual.trimQScores(minPos, -1);
2444                                 }
2445                                 success = minDiff;
2446                         }
2447                         
2448                         if (alignment != NULL) {  delete alignment;  }
2449                         
2450                 }
2451         
2452
2453                 return success;
2454                 
2455         }
2456         catch(exception& e) {
2457                 m->errorOut(e, "TrimOligos", "stripSpacer");
2458                 exit(1);
2459         }
2460 }
2461 //******************************************************************/
2462 bool TrimOligos::stripSpacer(Sequence& seq){
2463         try {
2464                 
2465                 string rawSequence = seq.getUnaligned();
2466                 bool success = sdiffs+1;        //guilty until proven innocent
2467                 
2468                 for(int i=0;i<spacer.size();i++){
2469                         string oligo = spacer[i];
2470                         
2471                         if(rawSequence.length() < oligo.length()){
2472                                 success = sdiffs+10;
2473                                 break;
2474                         }
2475                         
2476                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2477                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2478                                 success = 0;
2479                                 break;
2480                         }
2481                 }       
2482                 
2483         //if you found the spacer or if you don't want to allow for diffs
2484                 if ((sdiffs == 0) || (success == 0)) { return success;  }
2485                 
2486                 else { //try aligning and see if you can find it
2487                         Alignment* alignment;
2488                         if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
2489             else{ alignment = NULL; } 
2490                         
2491                         //can you find the barcode
2492                         int minDiff = 1e6;
2493                         int minCount = 1;
2494                         int minPos = 0;
2495                         
2496                         for(int i = 0; i < spacer.size(); i++){
2497                                 string oligo = spacer[i];
2498                                 //                              int length = oligo.length();
2499                                 
2500                                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2501                                         success = sdiffs + 10;
2502                                         break;
2503                                 }
2504                                 
2505                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2506                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2507                                 oligo = alignment->getSeqAAln();
2508                                 string temp = alignment->getSeqBAln();
2509                                 
2510                                 int alnLength = oligo.length();
2511                                 
2512                                 for(int i=oligo.length()-1;i>=0;i--){
2513                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2514                                 }
2515                                 oligo = oligo.substr(0,alnLength);
2516                                 temp = temp.substr(0,alnLength);
2517                                 
2518                                 int numDiff = countDiffs(oligo, temp);
2519                                 
2520                                 if(numDiff < minDiff){
2521                                         minDiff = numDiff;
2522                                         minCount = 1;
2523                                         minPos = 0;
2524                                         for(int i=0;i<alnLength;i++){
2525                                                 if(temp[i] != '-'){
2526                                                         minPos++;
2527                                                 }
2528                                         }
2529                                 }
2530                                 else if(numDiff == minDiff){
2531                                         minCount++;
2532                                 }
2533                                 
2534                         }
2535                         
2536                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
2537                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2538                         else{                                                                                                   //use the best match
2539                                 seq.setUnaligned(rawSequence.substr(minPos));
2540                                 success = minDiff;
2541                         }
2542                         
2543                         if (alignment != NULL) {  delete alignment;  }
2544                         
2545                 }
2546
2547                 return success;
2548                 
2549         }
2550         catch(exception& e) {
2551                 m->errorOut(e, "TrimOligos", "stripSpacer");
2552                 exit(1);
2553         }
2554 }
2555
2556 //******************************************************************/
2557 bool TrimOligos::compareDNASeq(string oligo, string seq){
2558         try {
2559                 bool success = 1;
2560                 int length = oligo.length();
2561                 
2562                 for(int i=0;i<length;i++){
2563                         
2564                         if(oligo[i] != seq[i]){
2565                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
2566                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
2567                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
2568                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
2569                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
2570                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
2571                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
2572                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
2573                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2574                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2575                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
2576                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
2577                                 
2578                                 if(success == 0)        {       break;   }
2579                         }
2580                         else{
2581                                 success = 1;
2582                         }
2583                 }
2584                 
2585                 return success;
2586         }
2587         catch(exception& e) {
2588                 m->errorOut(e, "TrimOligos", "compareDNASeq");
2589                 exit(1);
2590         }
2591         
2592 }
2593 //********************************************************************/
2594 int TrimOligos::countDiffs(string oligo, string seq){
2595         try {
2596                 
2597                 int length = oligo.length();
2598                 int countDiffs = 0;
2599                 
2600                 for(int i=0;i<length;i++){
2601                         
2602                         if(oligo[i] != seq[i]){
2603                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
2604                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
2605                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
2606                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
2607                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
2608                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
2609                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
2610                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
2611                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2612                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2613                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
2614                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
2615                         }
2616                         
2617                 }
2618                 
2619                 return countDiffs;
2620         }
2621         catch(exception& e) {
2622                 m->errorOut(e, "TrimOligos", "countDiffs");
2623                 exit(1);
2624         }
2625 }
2626 //********************************************************************/
2627 string TrimOligos::reverseOligo(string oligo){
2628         try {
2629         string reverse = "";
2630         
2631         for(int i=oligo.length()-1;i>=0;i--){
2632             
2633             if(oligo[i] == 'A')         {       reverse += 'T'; }
2634             else if(oligo[i] == 'T'){   reverse += 'A'; }
2635             else if(oligo[i] == 'U'){   reverse += 'A'; }
2636             
2637             else if(oligo[i] == 'G'){   reverse += 'C'; }
2638             else if(oligo[i] == 'C'){   reverse += 'G'; }
2639             
2640             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2641             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2642             
2643             else if(oligo[i] == 'M'){   reverse += 'K'; }
2644             else if(oligo[i] == 'K'){   reverse += 'M'; }
2645             
2646             else if(oligo[i] == 'W'){   reverse += 'W'; }
2647             else if(oligo[i] == 'S'){   reverse += 'S'; }
2648             
2649             else if(oligo[i] == 'B'){   reverse += 'V'; }
2650             else if(oligo[i] == 'V'){   reverse += 'B'; }
2651             
2652             else if(oligo[i] == 'D'){   reverse += 'H'; }
2653             else if(oligo[i] == 'H'){   reverse += 'D'; }
2654             
2655             else if(oligo[i] == '-'){   reverse += '-'; } 
2656             else                                                {       reverse += 'N'; }
2657         }
2658         
2659         
2660         return reverse;
2661     }
2662         catch(exception& e) {
2663                 m->errorOut(e, "TrimOligos", "reverseOligo");
2664                 exit(1);
2665         }
2666 }
2667
2668 /********************************************************************/
2669
2670
2671