]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / trimoligos.cpp
1 /*
2  *  trimoligos.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/1/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18         try {
19                 m = MothurOut::getInstance();
20                 
21                 pdiffs = p;
22                 bdiffs = b;
23         ldiffs = l;
24         sdiffs = s;
25                 
26                 barcodes = br;
27                 primers = pr;
28                 revPrimer = r;
29         linker = lk;
30         spacer = sp;
31         maxFBarcodeLength = 0;
32         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
33             if(it->first.length() > maxFBarcodeLength){
34                 maxFBarcodeLength = it->first.length();
35             }
36         }
37         maxFPrimerLength = 0;
38         map<string,int>::iterator it; 
39         for(it=primers.begin();it!=primers.end();it++){
40             if(it->first.length() > maxFPrimerLength){
41                 maxFPrimerLength = it->first.length();
42             }
43         }
44         
45         maxLinkerLength = 0;
46         for(int i = 0; i < linker.size(); i++){
47             if(linker[i].length() > maxLinkerLength){
48                 maxLinkerLength = linker[i].length();
49             }
50         }
51         
52         maxSpacerLength = 0;
53         for(int i = 0; i < spacer.size(); i++){
54             if(spacer[i].length() > maxSpacerLength){
55                 maxSpacerLength = spacer[i].length();
56             }
57         }
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "TrimOligos", "TrimOligos");
61                 exit(1);
62         }
63 }
64 /********************************************************************/
65 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
66 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
67         try {
68                 m = MothurOut::getInstance();
69                 
70                 pdiffs = p;
71                 bdiffs = b;
72         ldiffs = l;
73         sdiffs = s;
74                 
75         maxFBarcodeLength = 0;
76         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
77             string forward = it->second.forward;
78             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
79             
80             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length();  }
81             
82             if (itForward == ifbarcodes.end()) {
83                 vector<int> temp; temp.push_back(it->first);
84                 ifbarcodes[forward] = temp;
85             }else { itForward->second.push_back(it->first); }
86         }
87
88         maxFPrimerLength = 0;
89         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
90             string forward = it->second.forward;
91             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
92             
93             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length();  }
94             
95             if (itForward == ifprimers.end()) {
96                 vector<int> temp; temp.push_back(it->first);
97                 ifprimers[forward] = temp;
98             }else { itForward->second.push_back(it->first); }
99         }
100         
101         maxRBarcodeLength = 0;
102         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
103             string forward = it->second.reverse;
104             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
105             
106             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length();  }
107             
108             if (itForward == irbarcodes.end()) {
109                 vector<int> temp; temp.push_back(it->first);
110                 irbarcodes[forward] = temp;
111             }else { itForward->second.push_back(it->first); }
112         }
113         
114         maxRPrimerLength = 0;
115         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
116             string forward = it->second.reverse;
117             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
118             
119             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length();  }
120             
121             if (itForward == irprimers.end()) {
122                 vector<int> temp; temp.push_back(it->first);
123                 irprimers[forward] = temp;
124             }else { itForward->second.push_back(it->first); }
125         }
126
127         ipbarcodes = br;
128         ipprimers = pr;
129         }
130         catch(exception& e) {
131                 m->errorOut(e, "TrimOligos", "TrimOligos");
132                 exit(1);
133         }
134 }
135 /********************************************************************/
136 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
137 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
138         try {
139                 m = MothurOut::getInstance();
140                 
141                 pdiffs = p;
142                 bdiffs = b;
143                 
144                 barcodes = br;
145                 primers = pr;
146                 revPrimer = r;
147         }
148         catch(exception& e) {
149                 m->errorOut(e, "TrimOligos", "TrimOligos");
150                 exit(1);
151         }
152 }
153 /********************************************************************/
154 TrimOligos::~TrimOligos() {}
155 //*******************************************************************/
156 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
157         try {
158                 
159                 string rawSequence = seq.getUnaligned();
160                 int success = bdiffs + 1;       //guilty until proven innocent
161                 
162                 //can you find the barcode
163                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
164                         string oligo = it->first;
165                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
166                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
167                                 break;  
168                         }
169                         
170                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
171                                 group = it->second;
172                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
173                                 
174                                 if(qual.getName() != ""){
175                                         qual.trimQScores(oligo.length(), -1);
176                                 }
177                                 
178                                 success = 0;
179                                 break;
180                         }
181                 }
182                 
183                 //if you found the barcode or if you don't want to allow for diffs
184                 if ((bdiffs == 0) || (success == 0)) { return success;  }
185                 
186                 else { //try aligning and see if you can find it
187                         Alignment* alignment;
188                         if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
189                         else{ alignment = NULL; } 
190                         
191                         //can you find the barcode
192                         int minDiff = 1e6;
193                         int minCount = 1;
194                         int minGroup = -1;
195                         int minPos = 0;
196                         
197                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
198                                 string oligo = it->first;
199                                 //                              int length = oligo.length();
200                                 
201                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
202                                         success = bdiffs + 10;
203                                         break;
204                                 }
205                                 
206                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
207                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
208                                 oligo = alignment->getSeqAAln();
209                                 string temp = alignment->getSeqBAln();
210                                 
211                                 int alnLength = oligo.length();
212                                 
213                                 for(int i=oligo.length()-1;i>=0;i--){
214                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
215                                 }
216                                 oligo = oligo.substr(0,alnLength);
217                                 temp = temp.substr(0,alnLength);
218                 int numDiff = countDiffs(oligo, temp);
219                                 
220                                 if(numDiff < minDiff){
221                                         minDiff = numDiff;
222                                         minCount = 1;
223                                         minGroup = it->second;
224                                         minPos = 0;
225                                         for(int i=0;i<alnLength;i++){
226                                                 if(temp[i] != '-'){
227                                                         minPos++;
228                                                 }
229                                         }
230                                 }
231                                 else if(numDiff == minDiff){
232                                         minCount++;
233                                 }
234                                 
235                         }
236                         
237                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
238                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
239                         else{                                                                                                   //use the best match
240                                 group = minGroup;
241                                 seq.setUnaligned(rawSequence.substr(minPos));
242     
243                                 if(qual.getName() != ""){
244                                         qual.trimQScores(minPos, -1);
245                                 }
246                                 success = minDiff;
247                         }
248                         
249                         if (alignment != NULL) {  delete alignment;  }
250                         
251                 }
252                 
253                 return success;
254                 
255         }
256         catch(exception& e) {
257                 m->errorOut(e, "TrimOligos", "stripBarcode");
258                 exit(1);
259         }
260 }
261 //*******************************************************************/
262 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
263         try {
264                 //look for forward barcode
265                 string rawFSequence = forwardSeq.getUnaligned();
266         string rawRSequence = reverseSeq.getUnaligned();
267                 int success = bdiffs + 1;       //guilty until proven innocent
268                 
269                 //can you find the forward barcode
270                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
271                         string foligo = it->second.forward;
272             string roligo = it->second.reverse;
273             
274                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
275                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
276                                 break;  
277                         }
278             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
279                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
280                                 break;  
281                         }
282                         
283                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
284                                 group = it->first;
285                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
286                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
287                                 success = 0;
288                                 break;
289                         }
290                 }
291                 
292                 //if you found the barcode or if you don't want to allow for diffs
293                 if ((bdiffs == 0) || (success == 0)) { return success;  }
294                 else { //try aligning and see if you can find it
295                         Alignment* alignment;
296                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
297                         else{ alignment = NULL; } 
298                         
299                         //can you find the barcode
300                         int minDiff = 1e6;
301                         int minCount = 1;
302                         vector< vector<int> > minFGroup;
303                         vector<int> minFPos; 
304             
305             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
306             /*
307                 1   Sarah   Westcott
308                 2   John    Westcott
309                 3   Anna    Westcott
310                 4   Sarah   Schloss
311                 5   Pat     Schloss
312                 6   Gail    Brown
313                 7   Pat     Moore
314              
315                 only want to look for forward = Sarah, John, Anna, Pat, Gail
316                                       reverse = Westcott, Schloss, Brown, Moore
317              
318                 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
319              */
320             //cout << endl << forwardSeq.getName() << endl;         
321                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
322                                 string oligo = it->first;
323                                 
324                                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
325                                         success = bdiffs + 10;
326                                         break;
327                                 }
328                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
329                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
330                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
331                                 oligo = alignment->getSeqAAln();
332                                 string temp = alignment->getSeqBAln();
333                
334                                 int alnLength = oligo.length();
335                                 
336                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
337                                 oligo = oligo.substr(0,alnLength);
338                                 temp = temp.substr(0,alnLength);
339                 int numDiff = countDiffs(oligo, temp);
340                                 
341                 if (alnLength == 0) { numDiff = bdiffs + 100; }
342                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
343                 
344                                 if(numDiff < minDiff){
345                                         minDiff = numDiff;
346                                         minCount = 1;
347                                         minFGroup.clear();
348                     minFGroup.push_back(it->second);
349                                         int tempminFPos = 0;
350                     minFPos.clear();
351                                         for(int i=0;i<alnLength;i++){
352                                                 if(temp[i] != '-'){
353                                                         tempminFPos++;
354                                                 }
355                                         }
356                     minFPos.push_back(tempminFPos);
357                                 }else if(numDiff == minDiff){
358                                         minFGroup.push_back(it->second);
359                     int tempminFPos = 0;
360                                         for(int i=0;i<alnLength;i++){
361                                                 if(temp[i] != '-'){
362                                                         tempminFPos++;
363                                                 }
364                                         }
365                     minFPos.push_back(tempminFPos);
366                                 }
367                         }
368             
369                         //cout << minDiff << '\t' << minCount << '\t' << endl;
370                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
371                         else{   
372                 //check for reverse match
373                 if (alignment != NULL) {  delete alignment;  }
374                 
375                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
376                 else{ alignment = NULL; } 
377                 
378                 //can you find the barcode
379                 minDiff = 1e6;
380                 minCount = 1;
381                 vector< vector<int> > minRGroup;
382                 vector<int> minRPos;
383                 
384                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
385                     string oligo = it->first;
386                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
387                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
388                         success = bdiffs + 10;
389                         break;
390                     }
391                     
392                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
393                     alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
394                     oligo = alignment->getSeqAAln();
395                     string temp = alignment->getSeqBAln();
396                     
397                     int alnLength = oligo.length();
398                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
399                     oligo = oligo.substr(0,alnLength);
400                     temp = temp.substr(0,alnLength);
401                     int numDiff = countDiffs(oligo, temp);
402                     if (alnLength == 0) { numDiff = bdiffs + 100; }
403                     
404                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
405                     if(numDiff < minDiff){
406                         minDiff = numDiff;
407                         minCount = 1;
408                         minRGroup.clear();
409                         minRGroup.push_back(it->second);
410                         int tempminRPos = 0;
411                         minRPos.clear();
412                         for(int i=0;i<alnLength;i++){
413                             if(temp[i] != '-'){
414                                 tempminRPos++;
415                             }
416                         }
417                         minRPos.push_back(tempminRPos);                    
418                     }else if(numDiff == minDiff){
419                         int tempminRPos = 0;
420                         for(int i=0;i<alnLength;i++){
421                             if(temp[i] != '-'){
422                                 tempminRPos++;
423                             }
424                         }
425                         minRPos.push_back(tempminRPos);  
426                         minRGroup.push_back(it->second);
427                     }
428                     
429                 }
430                 
431                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
432                 for (int i = 0; i < minFGroup.size(); i++) { 
433                     cout << i << '\t';
434                     for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
435                     cout << endl;
436                 }
437                 cout << endl;
438                 for (int i = 0; i < minRGroup.size(); i++) { 
439                     cout << i << '\t';
440                     for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
441                     cout << endl;
442                 }
443                 cout << endl;*/
444                 if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
445                 else  {  
446                     bool foundMatch = false;
447                     vector<int> matches;
448                     for (int i = 0; i < minFGroup.size(); i++) {
449                         for (int j = 0; j < minFGroup[i].size(); j++) {
450                             for (int k = 0; k < minRGroup.size(); k++) {
451                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
452                             }
453                         }
454                     }
455                     
456                     int fStart = 0;
457                     int rStart = 0;
458                     if (matches.size() == 1) { 
459                         foundMatch = true;
460                         group = matches[0]; 
461                         fStart = minFPos[0];
462                         rStart = minRPos[0];
463                     }
464                     
465                     //we have an acceptable match for the forward and reverse, but do they match?
466                     if (foundMatch) {
467                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
468                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
469                         success = minDiff;
470                     }else { success = bdiffs + 100;     }
471                 }
472                         }
473                         
474                         if (alignment != NULL) {  delete alignment;  }
475                 }
476                 
477                 return success;
478                 
479         }
480         catch(exception& e) {
481                 m->errorOut(e, "TrimOligos", "stripIBarcode");
482                 exit(1);
483         }
484         
485 }
486 //*******************************************************************/
487 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
488         try {
489                 //look for forward barcode
490                 string rawFSequence = forwardSeq.getUnaligned();
491         string rawRSequence = reverseSeq.getUnaligned();
492                 int success = bdiffs + 1;       //guilty until proven innocent
493                 
494                 //can you find the forward barcode
495                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
496                         string foligo = it->second.forward;
497             string roligo = it->second.reverse;
498             
499                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
500                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
501                                 break;  
502                         }
503             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
504                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
505                                 break;  
506                         }
507                         
508                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
509                                 group = it->first;
510                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
511                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
512                 forwardQual.trimQScores(foligo.length(), -1);
513                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
514                                 success = 0;
515                                 break;
516                         }
517                 }
518                 
519                 //if you found the barcode or if you don't want to allow for diffs
520                 if ((bdiffs == 0) || (success == 0)) { return success;  }
521                 else { //try aligning and see if you can find it
522                         Alignment* alignment;
523                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
524                         else{ alignment = NULL; } 
525                         
526                         //can you find the barcode
527                         int minDiff = 1e6;
528                         int minCount = 1;
529                         vector< vector<int> > minFGroup;
530                         vector<int> minFPos; 
531             
532             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
533             /*
534              1   Sarah   Westcott
535              2   John    Westcott
536              3   Anna    Westcott
537              4   Sarah   Schloss
538              5   Pat     Schloss
539              6   Gail    Brown
540              7   Pat     Moore
541              
542              only want to look for forward = Sarah, John, Anna, Pat, Gail
543              reverse = Westcott, Schloss, Brown, Moore
544              
545              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
546              */
547             //cout << endl << forwardSeq.getName() << endl;         
548                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
549                                 string oligo = it->first;
550                                 
551                                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
552                                         success = bdiffs + 10;
553                                         break;
554                                 }
555                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
556                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
557                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
558                                 oligo = alignment->getSeqAAln();
559                                 string temp = alignment->getSeqBAln();
560                 
561                                 int alnLength = oligo.length();
562                                 
563                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
564                                 oligo = oligo.substr(0,alnLength);
565                                 temp = temp.substr(0,alnLength);
566                 int numDiff = countDiffs(oligo, temp);
567                                 
568                 if (alnLength == 0) { numDiff = bdiffs + 100; }
569                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
570                 
571                                 if(numDiff < minDiff){
572                                         minDiff = numDiff;
573                                         minCount = 1;
574                                         minFGroup.clear();
575                     minFGroup.push_back(it->second);
576                                         int tempminFPos = 0;
577                     minFPos.clear();
578                                         for(int i=0;i<alnLength;i++){
579                                                 if(temp[i] != '-'){
580                                                         tempminFPos++;
581                                                 }
582                                         }
583                     minFPos.push_back(tempminFPos);
584                                 }else if(numDiff == minDiff){
585                                         minFGroup.push_back(it->second);
586                     int tempminFPos = 0;
587                                         for(int i=0;i<alnLength;i++){
588                                                 if(temp[i] != '-'){
589                                                         tempminFPos++;
590                                                 }
591                                         }
592                     minFPos.push_back(tempminFPos);
593                                 }
594                         }
595             
596                         //cout << minDiff << '\t' << minCount << '\t' << endl;
597                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
598                         else{   
599                 //check for reverse match
600                 if (alignment != NULL) {  delete alignment;  }
601                 
602                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
603                 else{ alignment = NULL; } 
604                 
605                 //can you find the barcode
606                 minDiff = 1e6;
607                 minCount = 1;
608                 vector< vector<int> > minRGroup;
609                 vector<int> minRPos;
610                 
611                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
612                     string oligo = it->first;
613                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
614                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
615                         success = bdiffs + 10;
616                         break;
617                     }
618                     
619                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
620                     alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
621                     oligo = alignment->getSeqAAln();
622                     string temp = alignment->getSeqBAln();
623                     
624                     int alnLength = oligo.length();
625                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
626                     oligo = oligo.substr(0,alnLength);
627                     temp = temp.substr(0,alnLength);
628                     int numDiff = countDiffs(oligo, temp);
629                     if (alnLength == 0) { numDiff = bdiffs + 100; }
630                     
631                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
632                     if(numDiff < minDiff){
633                         minDiff = numDiff;
634                         minCount = 1;
635                         minRGroup.clear();
636                         minRGroup.push_back(it->second);
637                         int tempminRPos = 0;
638                         minRPos.clear();
639                         for(int i=0;i<alnLength;i++){
640                             if(temp[i] != '-'){
641                                 tempminRPos++;
642                             }
643                         }
644                         minRPos.push_back(tempminRPos);                    
645                     }else if(numDiff == minDiff){
646                         int tempminRPos = 0;
647                         for(int i=0;i<alnLength;i++){
648                             if(temp[i] != '-'){
649                                 tempminRPos++;
650                             }
651                         }
652                         minRPos.push_back(tempminRPos);  
653                         minRGroup.push_back(it->second);
654                     }
655                     
656                 }
657                 
658                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
659                  for (int i = 0; i < minFGroup.size(); i++) { 
660                  cout << i << '\t';
661                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
662                  cout << endl;
663                  }
664                  cout << endl;
665                  for (int i = 0; i < minRGroup.size(); i++) { 
666                  cout << i << '\t';
667                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
668                  cout << endl;
669                  }
670                  cout << endl;*/
671                 if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
672                 else  {  
673                     bool foundMatch = false;
674                     vector<int> matches;
675                     for (int i = 0; i < minFGroup.size(); i++) {
676                         for (int j = 0; j < minFGroup[i].size(); j++) {
677                             for (int k = 0; k < minRGroup.size(); k++) {
678                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
679                             }
680                         }
681                     }
682                     
683                     int fStart = 0;
684                     int rStart = 0;
685                     if (matches.size() == 1) { 
686                         foundMatch = true;
687                         group = matches[0]; 
688                         fStart = minFPos[0];
689                         rStart = minRPos[0];
690                     }
691                     
692                     //we have an acceptable match for the forward and reverse, but do they match?
693                     if (foundMatch) {
694                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
695                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
696                         forwardQual.trimQScores(fStart, -1);
697                         reverseQual.trimQScores(rStart, -1);
698                         success = minDiff;
699                     }else { success = bdiffs + 100;     }
700                 }
701                         }
702                         
703                         if (alignment != NULL) {  delete alignment;  }
704                 }
705                 
706                 return success;
707                 
708         }
709         catch(exception& e) {
710                 m->errorOut(e, "TrimOligos", "stripIBarcode");
711                 exit(1);
712         }
713         
714 }
715 //*******************************************************************/
716 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
717         try {
718                 //look for forward barcode
719                 string rawFSequence = forwardSeq.getUnaligned();
720         string rawRSequence = reverseSeq.getUnaligned();
721                 int success = pdiffs + 1;       //guilty until proven innocent
722                 
723                 //can you find the forward barcode
724                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
725                         string foligo = it->second.forward;
726             string roligo = it->second.reverse;
727             
728                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
729                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
730                                 break;  
731                         }
732             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
733                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
734                                 break;  
735                         }
736                         
737                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
738                                 group = it->first;
739                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
740                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
741                 forwardQual.trimQScores(foligo.length(), -1);
742                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
743                                 success = 0;
744                                 break;
745                         }
746                 }
747                 
748                 //if you found the barcode or if you don't want to allow for diffs
749                 if ((pdiffs == 0) || (success == 0)) { return success;  }
750                 else { //try aligning and see if you can find it
751                         Alignment* alignment;
752                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
753                         else{ alignment = NULL; } 
754                         
755                         //can you find the barcode
756                         int minDiff = 1e6;
757                         int minCount = 1;
758                         vector< vector<int> > minFGroup;
759                         vector<int> minFPos; 
760             
761             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
762             /*
763              1   Sarah   Westcott
764              2   John    Westcott
765              3   Anna    Westcott
766              4   Sarah   Schloss
767              5   Pat     Schloss
768              6   Gail    Brown
769              7   Pat     Moore
770              
771              only want to look for forward = Sarah, John, Anna, Pat, Gail
772              reverse = Westcott, Schloss, Brown, Moore
773              
774              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
775              */
776             //cout << endl << forwardSeq.getName() << endl;         
777                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
778                                 string oligo = it->first;
779                                 
780                                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
781                                         success = pdiffs + 10;
782                                         break;
783                                 }
784                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
785                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
786                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
787                                 oligo = alignment->getSeqAAln();
788                                 string temp = alignment->getSeqBAln();
789                 
790                                 int alnLength = oligo.length();
791                                 
792                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
793                                 oligo = oligo.substr(0,alnLength);
794                                 temp = temp.substr(0,alnLength);
795                 int numDiff = countDiffs(oligo, temp);
796                                 
797                 if (alnLength == 0) { numDiff = pdiffs + 100; }
798                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
799                 
800                                 if(numDiff < minDiff){
801                                         minDiff = numDiff;
802                                         minCount = 1;
803                                         minFGroup.clear();
804                     minFGroup.push_back(it->second);
805                                         int tempminFPos = 0;
806                     minFPos.clear();
807                                         for(int i=0;i<alnLength;i++){
808                                                 if(temp[i] != '-'){
809                                                         tempminFPos++;
810                                                 }
811                                         }
812                     minFPos.push_back(tempminFPos);
813                                 }else if(numDiff == minDiff){
814                                         minFGroup.push_back(it->second);
815                     int tempminFPos = 0;
816                                         for(int i=0;i<alnLength;i++){
817                                                 if(temp[i] != '-'){
818                                                         tempminFPos++;
819                                                 }
820                                         }
821                     minFPos.push_back(tempminFPos);
822                                 }
823                         }
824             
825                         //cout << minDiff << '\t' << minCount << '\t' << endl;
826                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
827                         else{   
828                 //check for reverse match
829                 if (alignment != NULL) {  delete alignment;  }
830                 
831                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
832                 else{ alignment = NULL; } 
833                 
834                 //can you find the barcode
835                 minDiff = 1e6;
836                 minCount = 1;
837                 vector< vector<int> > minRGroup;
838                 vector<int> minRPos;
839                 
840                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
841                     string oligo = it->first;
842                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
843                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
844                         success = pdiffs + 10;
845                         break;
846                     }
847                     
848                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
849                     alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
850                     oligo = alignment->getSeqAAln();
851                     string temp = alignment->getSeqBAln();
852                     
853                     int alnLength = oligo.length();
854                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
855                     oligo = oligo.substr(0,alnLength);
856                     temp = temp.substr(0,alnLength);
857                     int numDiff = countDiffs(oligo, temp);
858                     if (alnLength == 0) { numDiff = pdiffs + 100; }
859                     
860                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
861                     if(numDiff < minDiff){
862                         minDiff = numDiff;
863                         minCount = 1;
864                         minRGroup.clear();
865                         minRGroup.push_back(it->second);
866                         int tempminRPos = 0;
867                         minRPos.clear();
868                         for(int i=0;i<alnLength;i++){
869                             if(temp[i] != '-'){
870                                 tempminRPos++;
871                             }
872                         }
873                         minRPos.push_back(tempminRPos);                    
874                     }else if(numDiff == minDiff){
875                         int tempminRPos = 0;
876                         for(int i=0;i<alnLength;i++){
877                             if(temp[i] != '-'){
878                                 tempminRPos++;
879                             }
880                         }
881                         minRPos.push_back(tempminRPos);  
882                         minRGroup.push_back(it->second);
883                     }
884                     
885                 }
886                 
887                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
888                  for (int i = 0; i < minFGroup.size(); i++) { 
889                  cout << i << '\t';
890                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
891                  cout << endl;
892                  }
893                  cout << endl;
894                  for (int i = 0; i < minRGroup.size(); i++) { 
895                  cout << i << '\t';
896                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
897                  cout << endl;
898                  }
899                  cout << endl;*/
900                 if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
901                 else  {  
902                     bool foundMatch = false;
903                     vector<int> matches;
904                     for (int i = 0; i < minFGroup.size(); i++) {
905                         for (int j = 0; j < minFGroup[i].size(); j++) {
906                             for (int k = 0; k < minRGroup.size(); k++) {
907                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
908                             }
909                         }
910                     }
911                     
912                     int fStart = 0;
913                     int rStart = 0;
914                     if (matches.size() == 1) { 
915                         foundMatch = true;
916                         group = matches[0]; 
917                         fStart = minFPos[0];
918                         rStart = minRPos[0];
919                     }
920                     
921                     //we have an acceptable match for the forward and reverse, but do they match?
922                     if (foundMatch) {
923                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
924                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
925                         forwardQual.trimQScores(fStart, -1);
926                         reverseQual.trimQScores(rStart, -1);
927                         success = minDiff;
928                     }else { success = pdiffs + 100;     }
929                 }
930                         }
931                         
932                         if (alignment != NULL) {  delete alignment;  }
933                 }
934                 
935                 return success;
936                 
937         }
938         catch(exception& e) {
939                 m->errorOut(e, "TrimOligos", "stripIForward");
940                 exit(1);
941         }
942         
943 }
944 //*******************************************************************/
945 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
946         try {
947                 //look for forward barcode
948                 string rawFSequence = forwardSeq.getUnaligned();
949         string rawRSequence = reverseSeq.getUnaligned();
950                 int success = pdiffs + 1;       //guilty until proven innocent
951                 
952                 //can you find the forward barcode
953                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
954                         string foligo = it->second.forward;
955             string roligo = it->second.reverse;
956             
957                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
958                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
959                                 break;  
960                         }
961             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
962                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
963                                 break;  
964                         }
965                         
966                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
967                                 group = it->first;
968                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
969                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
970                                 success = 0;
971                                 break;
972                         }
973                 }
974                 
975                 //if you found the barcode or if you don't want to allow for diffs
976                 if ((pdiffs == 0) || (success == 0)) { return success;  }
977                 else { //try aligning and see if you can find it
978                         Alignment* alignment;
979                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
980                         else{ alignment = NULL; } 
981                         
982                         //can you find the barcode
983                         int minDiff = 1e6;
984                         int minCount = 1;
985                         vector< vector<int> > minFGroup;
986                         vector<int> minFPos; 
987             
988             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
989             /*
990              1   Sarah   Westcott
991              2   John    Westcott
992              3   Anna    Westcott
993              4   Sarah   Schloss
994              5   Pat     Schloss
995              6   Gail    Brown
996              7   Pat     Moore
997              
998              only want to look for forward = Sarah, John, Anna, Pat, Gail
999              reverse = Westcott, Schloss, Brown, Moore
1000              
1001              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
1002              */
1003             //cout << endl << forwardSeq.getName() << endl;         
1004                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1005                                 string oligo = it->first;
1006                                 
1007                                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1008                                         success = pdiffs + 10;
1009                                         break;
1010                                 }
1011                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1012                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1013                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1014                                 oligo = alignment->getSeqAAln();
1015                                 string temp = alignment->getSeqBAln();
1016                 
1017                                 int alnLength = oligo.length();
1018                                 
1019                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1020                                 oligo = oligo.substr(0,alnLength);
1021                                 temp = temp.substr(0,alnLength);
1022                 int numDiff = countDiffs(oligo, temp);
1023                                 
1024                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1025                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1026                 
1027                                 if(numDiff < minDiff){
1028                                         minDiff = numDiff;
1029                                         minCount = 1;
1030                                         minFGroup.clear();
1031                     minFGroup.push_back(it->second);
1032                                         int tempminFPos = 0;
1033                     minFPos.clear();
1034                                         for(int i=0;i<alnLength;i++){
1035                                                 if(temp[i] != '-'){
1036                                                         tempminFPos++;
1037                                                 }
1038                                         }
1039                     minFPos.push_back(tempminFPos);
1040                                 }else if(numDiff == minDiff){
1041                                         minFGroup.push_back(it->second);
1042                     int tempminFPos = 0;
1043                                         for(int i=0;i<alnLength;i++){
1044                                                 if(temp[i] != '-'){
1045                                                         tempminFPos++;
1046                                                 }
1047                                         }
1048                     minFPos.push_back(tempminFPos);
1049                                 }
1050                         }
1051             
1052                         //cout << minDiff << '\t' << minCount << '\t' << endl;
1053                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1054                         else{   
1055                 //check for reverse match
1056                 if (alignment != NULL) {  delete alignment;  }
1057                 
1058                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
1059                 else{ alignment = NULL; } 
1060                 
1061                 //can you find the barcode
1062                 minDiff = 1e6;
1063                 minCount = 1;
1064                 vector< vector<int> > minRGroup;
1065                 vector<int> minRPos;
1066                 
1067                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1068                     string oligo = it->first;
1069                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1070                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1071                         success = pdiffs + 10;
1072                         break;
1073                     }
1074                     
1075                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1076                     alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1077                     oligo = alignment->getSeqAAln();
1078                     string temp = alignment->getSeqBAln();
1079                     
1080                     int alnLength = oligo.length();
1081                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1082                     oligo = oligo.substr(0,alnLength);
1083                     temp = temp.substr(0,alnLength);
1084                     int numDiff = countDiffs(oligo, temp);
1085                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1086                     
1087                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1088                     if(numDiff < minDiff){
1089                         minDiff = numDiff;
1090                         minCount = 1;
1091                         minRGroup.clear();
1092                         minRGroup.push_back(it->second);
1093                         int tempminRPos = 0;
1094                         minRPos.clear();
1095                         for(int i=0;i<alnLength;i++){
1096                             if(temp[i] != '-'){
1097                                 tempminRPos++;
1098                             }
1099                         }
1100                         minRPos.push_back(tempminRPos);                    
1101                     }else if(numDiff == minDiff){
1102                         int tempminRPos = 0;
1103                         for(int i=0;i<alnLength;i++){
1104                             if(temp[i] != '-'){
1105                                 tempminRPos++;
1106                             }
1107                         }
1108                         minRPos.push_back(tempminRPos);  
1109                         minRGroup.push_back(it->second);
1110                     }
1111                     
1112                 }
1113                 
1114                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1115                  for (int i = 0; i < minFGroup.size(); i++) { 
1116                  cout << i << '\t';
1117                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1118                  cout << endl;
1119                  }
1120                  cout << endl;
1121                  for (int i = 0; i < minRGroup.size(); i++) { 
1122                  cout << i << '\t';
1123                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1124                  cout << endl;
1125                  }
1126                  cout << endl;*/
1127                 if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1128                 else  {  
1129                     bool foundMatch = false;
1130                     vector<int> matches;
1131                     for (int i = 0; i < minFGroup.size(); i++) {
1132                         for (int j = 0; j < minFGroup[i].size(); j++) {
1133                             for (int k = 0; k < minRGroup.size(); k++) {
1134                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1135                             }
1136                         }
1137                     }
1138                     
1139                     int fStart = 0;
1140                     int rStart = 0;
1141                     if (matches.size() == 1) { 
1142                         foundMatch = true;
1143                         group = matches[0]; 
1144                         fStart = minFPos[0];
1145                         rStart = minRPos[0];
1146                     }
1147                     
1148                     //we have an acceptable match for the forward and reverse, but do they match?
1149                     if (foundMatch) {
1150                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1151                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1152                         success = minDiff;
1153                     }else { success = pdiffs + 100;     }
1154                 }
1155                         }
1156                         
1157                         if (alignment != NULL) {  delete alignment;  }
1158                 }
1159                 
1160                 return success;
1161                 
1162         }
1163         catch(exception& e) {
1164                 m->errorOut(e, "TrimOligos", "stripIForward");
1165                 exit(1);
1166         }
1167         
1168 }
1169 //*******************************************************************/
1170 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1171         try {
1172                 
1173                 string rawSequence = seq.getUnaligned();
1174                 int success = bdiffs + 1;       //guilty until proven innocent
1175                 
1176                 //can you find the barcode
1177                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1178                         string oligo = it->first;
1179                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1180                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1181                                 break;  
1182                         }
1183                         
1184                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1185                                 group = it->second;
1186                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1187                                 
1188                                 success = 0;
1189                                 break;
1190                         }
1191                 }
1192                 
1193                 //if you found the barcode or if you don't want to allow for diffs
1194                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1195                 
1196                 else { //try aligning and see if you can find it
1197             Alignment* alignment;
1198                         if (barcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));  }
1199                         else{ alignment = NULL; } 
1200                         
1201                         //can you find the barcode
1202                         int minDiff = 1e6;
1203                         int minCount = 1;
1204                         int minGroup = -1;
1205                         int minPos = 0;
1206                         
1207                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1208                                 string oligo = it->first;
1209                                 //                              int length = oligo.length();
1210                                 
1211                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
1212                                         success = bdiffs + 10;
1213                                         break;
1214                                 }
1215                                 
1216                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1217                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1218                                 oligo = alignment->getSeqAAln();
1219                                 string temp = alignment->getSeqBAln();
1220                                  
1221                                 int alnLength = oligo.length();
1222                                 
1223                                 for(int i=oligo.length()-1;i>=0;i--){
1224                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1225                                 }
1226                                 oligo = oligo.substr(0,alnLength);
1227                                 temp = temp.substr(0,alnLength);
1228                                  
1229                                 int numDiff = countDiffs(oligo, temp);
1230                                 
1231                                 if(numDiff < minDiff){
1232                                         minDiff = numDiff;
1233                                         minCount = 1;
1234                                         minGroup = it->second;
1235                                         minPos = 0;
1236                                         for(int i=0;i<alnLength;i++){
1237                                                 if(temp[i] != '-'){
1238                                                         minPos++;
1239                                                 }
1240                                         }
1241                                 }
1242                                 else if(numDiff == minDiff){
1243                                         minCount++;
1244                                 }
1245                                 
1246                         }
1247                         
1248                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1249                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1250                         else{                                                                                                   //use the best match
1251                                 group = minGroup;
1252                                 seq.setUnaligned(rawSequence.substr(minPos));
1253                                 success = minDiff;
1254                         }
1255                         
1256                         if (alignment != NULL) {  delete alignment;  }
1257                         
1258                 }
1259                 
1260                 return success;
1261                 
1262         }
1263         catch(exception& e) {
1264                 m->errorOut(e, "TrimOligos", "stripBarcode");
1265                 exit(1);
1266         }
1267         
1268 }
1269
1270 /********************************************************************/
1271 int TrimOligos::stripForward(Sequence& seq, int& group){
1272         try {
1273                 
1274                 string rawSequence = seq.getUnaligned();
1275                 int success = pdiffs + 1;       //guilty until proven innocent
1276                 
1277                 //can you find the primer
1278                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1279                         string oligo = it->first;
1280                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1281                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1282                                 break;  
1283                         }
1284                         
1285                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1286                                 group = it->second;
1287                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1288                                 success = 0;
1289                                 break;
1290                         }
1291                 }
1292                 
1293                 //if you found the barcode or if you don't want to allow for diffs
1294                 if ((pdiffs == 0) || (success == 0)) {  return success;  }
1295                 
1296                 else { //try aligning and see if you can find it
1297                         Alignment* alignment;
1298                         if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1299             else{ alignment = NULL; } 
1300                         
1301                         //can you find the barcode
1302                         int minDiff = 1e6;
1303                         int minCount = 1;
1304                         int minGroup = -1;
1305                         int minPos = 0;
1306                         
1307                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1308                                 string oligo = it->first;
1309                                 //                              int length = oligo.length();
1310                                 
1311                                 if(rawSequence.length() < maxFPrimerLength){    
1312                                         success = pdiffs + 100;
1313                                         break;
1314                                 }
1315                                 
1316                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1317                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1318                                 oligo = alignment->getSeqAAln();
1319                                 string temp = alignment->getSeqBAln();
1320                                 
1321                                 int alnLength = oligo.length();
1322                                 
1323                                 for(int i=oligo.length()-1;i>=0;i--){
1324                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1325                                 }
1326                                 oligo = oligo.substr(0,alnLength);
1327                                 temp = temp.substr(0,alnLength);
1328                                 
1329                                 int numDiff = countDiffs(oligo, temp);
1330                                 
1331                                 if(numDiff < minDiff){
1332                                         minDiff = numDiff;
1333                                         minCount = 1;
1334                                         minGroup = it->second;
1335                                         minPos = 0;
1336                                         for(int i=0;i<alnLength;i++){
1337                                                 if(temp[i] != '-'){
1338                                                         minPos++;
1339                                                 }
1340                                         }
1341                                 }
1342                                 else if(numDiff == minDiff){
1343                                         minCount++;
1344                                 }
1345                                 
1346                         }
1347                         
1348                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1349                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1350                         else{                                                                                                   //use the best match
1351                                 group = minGroup;
1352                                 seq.setUnaligned(rawSequence.substr(minPos));
1353                                 success = minDiff;
1354                         }
1355                         
1356                         if (alignment != NULL) {  delete alignment;  }
1357                         
1358                 }
1359                 
1360                 return success;
1361                 
1362         }
1363         catch(exception& e) {
1364                 m->errorOut(e, "TrimOligos", "stripForward");
1365                 exit(1);
1366         }
1367 }
1368 //*******************************************************************/
1369 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1370         try {
1371                 string rawSequence = seq.getUnaligned();
1372                 int success = pdiffs + 1;       //guilty until proven innocent
1373                 
1374                 //can you find the primer
1375                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1376                         string oligo = it->first;
1377                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1378                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1379                                 break;  
1380                         }
1381                         
1382                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1383                                 group = it->second;
1384                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1385                                 if(qual.getName() != ""){
1386                                         if (!keepForward) {  qual.trimQScores(oligo.length(), -1); }
1387                                 }
1388                                 success = 0;
1389                                 break;
1390                         }
1391                 }
1392                 
1393                 //if you found the barcode or if you don't want to allow for diffs
1394                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1395                 
1396                 else { //try aligning and see if you can find it
1397                         Alignment* alignment;
1398                         if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
1399                         else{ alignment = NULL; } 
1400                         
1401                         //can you find the barcode
1402                         int minDiff = 1e6;
1403                         int minCount = 1;
1404                         int minGroup = -1;
1405                         int minPos = 0;
1406                         
1407                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1408                                 string oligo = it->first;
1409                                 //                              int length = oligo.length();
1410                                 
1411                                 if(rawSequence.length() < maxFPrimerLength){    
1412                                         success = pdiffs + 100;
1413                                         break;
1414                                 }
1415                                 
1416                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1417                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1418                                 oligo = alignment->getSeqAAln();
1419                                 string temp = alignment->getSeqBAln();
1420                                 
1421                                 int alnLength = oligo.length();
1422                                 
1423                                 for(int i=oligo.length()-1;i>=0;i--){
1424                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1425                                 }
1426                                 oligo = oligo.substr(0,alnLength);
1427                                 temp = temp.substr(0,alnLength);
1428                                 
1429                                 int numDiff = countDiffs(oligo, temp);
1430                                 
1431                                 if(numDiff < minDiff){
1432                                         minDiff = numDiff;
1433                                         minCount = 1;
1434                                         minGroup = it->second;
1435                                         minPos = 0;
1436                                         for(int i=0;i<alnLength;i++){
1437                                                 if(temp[i] != '-'){
1438                                                         minPos++;
1439                                                 }
1440                                         }
1441                                 }
1442                                 else if(numDiff == minDiff){
1443                                         minCount++;
1444                                 }
1445                                 
1446                         }
1447                         
1448                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1449                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1450                         else{                                                                                                   //use the best match
1451                                 group = minGroup;
1452                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
1453                                 if(qual.getName() != ""){
1454                                         if (!keepForward) { qual.trimQScores(minPos, -1); }
1455                                 }
1456                                 success = minDiff;
1457                         }
1458                         
1459                         if (alignment != NULL) {  delete alignment;  }
1460                         
1461                 }
1462                 
1463                 return success;
1464                 
1465         }
1466         catch(exception& e) {
1467                 m->errorOut(e, "TrimOligos", "stripForward");
1468                 exit(1);
1469         }
1470 }
1471 //******************************************************************/
1472 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
1473         try {
1474                 string rawSequence = seq.getUnaligned();
1475                 bool success = 0;       //guilty until proven innocent
1476                 
1477                 for(int i=0;i<revPrimer.size();i++){
1478                         string oligo = revPrimer[i];
1479                         
1480                         if(rawSequence.length() < oligo.length()){
1481                                 success = 0;
1482                                 break;
1483                         }
1484                         
1485                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1486                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1487                                 if(qual.getName() != ""){
1488                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1489                                 }
1490                                 success = 1;
1491                                 break;
1492                         }
1493                 }       
1494                 return success;
1495                 
1496         }
1497         catch(exception& e) {
1498                 m->errorOut(e, "TrimOligos", "stripReverse");
1499                 exit(1);
1500         }
1501 }
1502 //******************************************************************/
1503 bool TrimOligos::stripReverse(Sequence& seq){
1504         try {
1505                 
1506                 string rawSequence = seq.getUnaligned();
1507                 bool success = 0;       //guilty until proven innocent
1508                 
1509                 for(int i=0;i<revPrimer.size();i++){
1510                         string oligo = revPrimer[i];
1511                         
1512                         if(rawSequence.length() < oligo.length()){
1513                                 success = 0;
1514                                 break;
1515                         }
1516                         
1517                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1518                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1519                                 success = 1;
1520                                 break;
1521                         }
1522                 }       
1523                 
1524                 return success;
1525                 
1526         }
1527         catch(exception& e) {
1528                 m->errorOut(e, "TrimOligos", "stripReverse");
1529                 exit(1);
1530         }
1531 }
1532 //******************************************************************/
1533 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
1534         try {
1535                 string rawSequence = seq.getUnaligned();
1536                 bool success = ldiffs + 1;      //guilty until proven innocent
1537                 
1538                 for(int i=0;i<linker.size();i++){
1539                         string oligo = linker[i];
1540                         
1541                         if(rawSequence.length() < oligo.length()){
1542                                 success = ldiffs + 10;
1543                                 break;
1544                         }
1545                         
1546                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1547                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1548                                 if(qual.getName() != ""){
1549                                         qual.trimQScores(oligo.length(), -1);
1550                                 }
1551                                 success = 0;
1552                                 break;
1553                         }
1554                 }
1555         
1556         //if you found the linker or if you don't want to allow for diffs
1557                 if ((ldiffs == 0) || (success == 0)) { return success;  }
1558                 
1559                 else { //try aligning and see if you can find it
1560                         Alignment* alignment;
1561                         if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));   }     
1562                         else{ alignment = NULL; } 
1563                         
1564                         //can you find the barcode
1565                         int minDiff = 1e6;
1566                         int minCount = 1;
1567                         int minPos = 0;
1568                         
1569                         for(int i = 0; i < linker.size(); i++){
1570                                 string oligo = linker[i];
1571                                 //                              int length = oligo.length();
1572                                 
1573                                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
1574                                         success = ldiffs + 10;
1575                                         break;
1576                                 }
1577                                 
1578                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1579                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1580                                 oligo = alignment->getSeqAAln();
1581                                 string temp = alignment->getSeqBAln();
1582                                 
1583                                 int alnLength = oligo.length();
1584                                 
1585                                 for(int i=oligo.length()-1;i>=0;i--){
1586                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1587                                 }
1588                                 oligo = oligo.substr(0,alnLength);
1589                                 temp = temp.substr(0,alnLength);
1590                                 
1591                                 int numDiff = countDiffs(oligo, temp);
1592                                 
1593                                 if(numDiff < minDiff){
1594                                         minDiff = numDiff;
1595                                         minCount = 1;
1596                                         minPos = 0;
1597                                         for(int i=0;i<alnLength;i++){
1598                                                 if(temp[i] != '-'){
1599                                                         minPos++;
1600                                                 }
1601                                         }
1602                                 }
1603                                 else if(numDiff == minDiff){
1604                                         minCount++;
1605                                 }
1606                                 
1607                         }
1608                         
1609                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
1610                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
1611                         else{                                                                                                   //use the best match
1612                                 seq.setUnaligned(rawSequence.substr(minPos));
1613                                 
1614                                 if(qual.getName() != ""){
1615                                         qual.trimQScores(minPos, -1);
1616                                 }
1617                                 success = minDiff;
1618                         }
1619                         
1620                         if (alignment != NULL) {  delete alignment;  }
1621                         
1622                 }
1623
1624        
1625                 return success;
1626                 
1627         }
1628         catch(exception& e) {
1629                 m->errorOut(e, "TrimOligos", "stripLinker");
1630                 exit(1);
1631         }
1632 }
1633 //******************************************************************/
1634 bool TrimOligos::stripLinker(Sequence& seq){
1635         try {
1636                 
1637                 string rawSequence = seq.getUnaligned();
1638                 bool success = ldiffs +1;       //guilty until proven innocent
1639                 
1640                 for(int i=0;i<linker.size();i++){
1641                         string oligo = linker[i];
1642                         
1643                         if(rawSequence.length() < oligo.length()){
1644                                 success = ldiffs +10;
1645                                 break;
1646                         }
1647                         
1648                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1649                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1650                                 success = 0;
1651                                 break;
1652                         }
1653                 }       
1654                 
1655         //if you found the linker or if you don't want to allow for diffs
1656                 if ((ldiffs == 0) || (success == 0)) { return success;  }
1657                 
1658                 else { //try aligning and see if you can find it
1659                         Alignment* alignment;
1660                         if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));  }
1661             else{ alignment = NULL; } 
1662                         
1663                         //can you find the barcode
1664                         int minDiff = 1e6;
1665                         int minCount = 1;
1666                         int minPos = 0;
1667                         
1668                         for(int i = 0; i < linker.size(); i++){
1669                                 string oligo = linker[i];
1670                                 //                              int length = oligo.length();
1671                                 
1672                                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
1673                                         success = ldiffs + 10;
1674                                         break;
1675                                 }
1676                                 
1677                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1678                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1679                                 oligo = alignment->getSeqAAln();
1680                                 string temp = alignment->getSeqBAln();
1681                                 
1682                                 int alnLength = oligo.length();
1683                                 
1684                                 for(int i=oligo.length()-1;i>=0;i--){
1685                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1686                                 }
1687                                 oligo = oligo.substr(0,alnLength);
1688                                 temp = temp.substr(0,alnLength);
1689                                 
1690                                 int numDiff = countDiffs(oligo, temp);
1691                                 
1692                                 if(numDiff < minDiff){
1693                                         minDiff = numDiff;
1694                                         minCount = 1;
1695                                         minPos = 0;
1696                                         for(int i=0;i<alnLength;i++){
1697                                                 if(temp[i] != '-'){
1698                                                         minPos++;
1699                                                 }
1700                                         }
1701                                 }
1702                                 else if(numDiff == minDiff){
1703                                         minCount++;
1704                                 }
1705                                 
1706                         }
1707                         
1708                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
1709                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
1710                         else{                                                                                                   //use the best match
1711                                 seq.setUnaligned(rawSequence.substr(minPos));
1712                                 success = minDiff;
1713                         }
1714                         
1715                         if (alignment != NULL) {  delete alignment;  }
1716                         
1717                 }
1718
1719                 return success;
1720                 
1721         }
1722         catch(exception& e) {
1723                 m->errorOut(e, "TrimOligos", "stripLinker");
1724                 exit(1);
1725         }
1726 }
1727
1728 //******************************************************************/
1729 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
1730         try {
1731                 string rawSequence = seq.getUnaligned();
1732                 bool success = sdiffs+1;        //guilty until proven innocent
1733                 
1734                 for(int i=0;i<spacer.size();i++){
1735                         string oligo = spacer[i];
1736                         
1737                         if(rawSequence.length() < oligo.length()){
1738                                 success = sdiffs+10;
1739                                 break;
1740                         }
1741                         
1742                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1743                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1744                                 if(qual.getName() != ""){
1745                                         qual.trimQScores(oligo.length(), -1);
1746                                 }
1747                                 success = 0;
1748                                 break;
1749                         }
1750                 }
1751         
1752         //if you found the spacer or if you don't want to allow for diffs
1753                 if ((sdiffs == 0) || (success == 0)) { return success;  }
1754                 
1755                 else { //try aligning and see if you can find it
1756                         Alignment* alignment;
1757                         if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
1758             else{ alignment = NULL; } 
1759                         
1760                         //can you find the barcode
1761                         int minDiff = 1e6;
1762                         int minCount = 1;
1763                         int minPos = 0;
1764                         
1765                         for(int i = 0; i < spacer.size(); i++){
1766                                 string oligo = spacer[i];
1767                                 //                              int length = oligo.length();
1768                                 
1769                                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
1770                                         success = sdiffs + 10;
1771                                         break;
1772                                 }
1773                                 
1774                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1775                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1776                                 oligo = alignment->getSeqAAln();
1777                                 string temp = alignment->getSeqBAln();
1778                                 
1779                                 int alnLength = oligo.length();
1780                                 
1781                                 for(int i=oligo.length()-1;i>=0;i--){
1782                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1783                                 }
1784                                 oligo = oligo.substr(0,alnLength);
1785                                 temp = temp.substr(0,alnLength);
1786                                 
1787                                 int numDiff = countDiffs(oligo, temp);
1788                                 
1789                                 if(numDiff < minDiff){
1790                                         minDiff = numDiff;
1791                                         minCount = 1;
1792                                         minPos = 0;
1793                                         for(int i=0;i<alnLength;i++){
1794                                                 if(temp[i] != '-'){
1795                                                         minPos++;
1796                                                 }
1797                                         }
1798                                 }
1799                                 else if(numDiff == minDiff){
1800                                         minCount++;
1801                                 }
1802                                 
1803                         }
1804                         
1805                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
1806                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
1807                         else{                                                                                                   //use the best match
1808                                 seq.setUnaligned(rawSequence.substr(minPos));
1809                                 
1810                                 if(qual.getName() != ""){
1811                                         qual.trimQScores(minPos, -1);
1812                                 }
1813                                 success = minDiff;
1814                         }
1815                         
1816                         if (alignment != NULL) {  delete alignment;  }
1817                         
1818                 }
1819         
1820
1821                 return success;
1822                 
1823         }
1824         catch(exception& e) {
1825                 m->errorOut(e, "TrimOligos", "stripSpacer");
1826                 exit(1);
1827         }
1828 }
1829 //******************************************************************/
1830 bool TrimOligos::stripSpacer(Sequence& seq){
1831         try {
1832                 
1833                 string rawSequence = seq.getUnaligned();
1834                 bool success = sdiffs+1;        //guilty until proven innocent
1835                 
1836                 for(int i=0;i<spacer.size();i++){
1837                         string oligo = spacer[i];
1838                         
1839                         if(rawSequence.length() < oligo.length()){
1840                                 success = sdiffs+10;
1841                                 break;
1842                         }
1843                         
1844                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1845                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1846                                 success = 0;
1847                                 break;
1848                         }
1849                 }       
1850                 
1851         //if you found the spacer or if you don't want to allow for diffs
1852                 if ((sdiffs == 0) || (success == 0)) { return success;  }
1853                 
1854                 else { //try aligning and see if you can find it
1855                         Alignment* alignment;
1856                         if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
1857             else{ alignment = NULL; } 
1858                         
1859                         //can you find the barcode
1860                         int minDiff = 1e6;
1861                         int minCount = 1;
1862                         int minPos = 0;
1863                         
1864                         for(int i = 0; i < spacer.size(); i++){
1865                                 string oligo = spacer[i];
1866                                 //                              int length = oligo.length();
1867                                 
1868                                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
1869                                         success = sdiffs + 10;
1870                                         break;
1871                                 }
1872                                 
1873                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1874                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1875                                 oligo = alignment->getSeqAAln();
1876                                 string temp = alignment->getSeqBAln();
1877                                 
1878                                 int alnLength = oligo.length();
1879                                 
1880                                 for(int i=oligo.length()-1;i>=0;i--){
1881                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1882                                 }
1883                                 oligo = oligo.substr(0,alnLength);
1884                                 temp = temp.substr(0,alnLength);
1885                                 
1886                                 int numDiff = countDiffs(oligo, temp);
1887                                 
1888                                 if(numDiff < minDiff){
1889                                         minDiff = numDiff;
1890                                         minCount = 1;
1891                                         minPos = 0;
1892                                         for(int i=0;i<alnLength;i++){
1893                                                 if(temp[i] != '-'){
1894                                                         minPos++;
1895                                                 }
1896                                         }
1897                                 }
1898                                 else if(numDiff == minDiff){
1899                                         minCount++;
1900                                 }
1901                                 
1902                         }
1903                         
1904                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
1905                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
1906                         else{                                                                                                   //use the best match
1907                                 seq.setUnaligned(rawSequence.substr(minPos));
1908                                 success = minDiff;
1909                         }
1910                         
1911                         if (alignment != NULL) {  delete alignment;  }
1912                         
1913                 }
1914
1915                 return success;
1916                 
1917         }
1918         catch(exception& e) {
1919                 m->errorOut(e, "TrimOligos", "stripSpacer");
1920                 exit(1);
1921         }
1922 }
1923
1924 //******************************************************************/
1925 bool TrimOligos::compareDNASeq(string oligo, string seq){
1926         try {
1927                 bool success = 1;
1928                 int length = oligo.length();
1929                 
1930                 for(int i=0;i<length;i++){
1931                         
1932                         if(oligo[i] != seq[i]){
1933                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1934                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1935                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1936                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1937                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1938                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1939                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1940                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1941                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1942                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1943                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1944                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1945                                 
1946                                 if(success == 0)        {       break;   }
1947                         }
1948                         else{
1949                                 success = 1;
1950                         }
1951                 }
1952                 
1953                 return success;
1954         }
1955         catch(exception& e) {
1956                 m->errorOut(e, "TrimOligos", "compareDNASeq");
1957                 exit(1);
1958         }
1959         
1960 }
1961 //********************************************************************/
1962 int TrimOligos::countDiffs(string oligo, string seq){
1963         try {
1964                 
1965                 int length = oligo.length();
1966                 int countDiffs = 0;
1967                 
1968                 for(int i=0;i<length;i++){
1969                         
1970                         if(oligo[i] != seq[i]){
1971                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1972                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1973                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1974                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1975                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1976                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1977                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1978                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1979                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1980                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1981                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1982                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1983                         }
1984                         
1985                 }
1986                 
1987                 return countDiffs;
1988         }
1989         catch(exception& e) {
1990                 m->errorOut(e, "TrimOligos", "countDiffs");
1991                 exit(1);
1992         }
1993 }
1994 /********************************************************************/
1995
1996
1997