]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / trimoligos.cpp
1 /*
2  *  trimoligos.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/1/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18         try {
19                 m = MothurOut::getInstance();
20                 
21                 pdiffs = p;
22                 bdiffs = b;
23         ldiffs = l;
24         sdiffs = s;
25                 
26                 barcodes = br;
27                 primers = pr;
28                 revPrimer = r;
29         linker = lk;
30         spacer = sp;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "TrimOligos", "TrimOligos");
34                 exit(1);
35         }
36 }
37 /********************************************************************/
38 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
39 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br, vector<string> lk, vector<string> sp){
40         try {
41                 m = MothurOut::getInstance();
42                 
43                 pdiffs = p;
44                 bdiffs = b;
45         ldiffs = l;
46         sdiffs = s;
47                 
48                 ibarcodes = br;
49                 iprimers = pr;
50         linker = lk;
51         spacer = sp;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "TrimOligos", "TrimOligos");
55                 exit(1);
56         }
57 }
58 /********************************************************************/
59 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
60 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
61         try {
62                 m = MothurOut::getInstance();
63                 
64                 pdiffs = p;
65                 bdiffs = b;
66                 
67                 barcodes = br;
68                 primers = pr;
69                 revPrimer = r;
70         }
71         catch(exception& e) {
72                 m->errorOut(e, "TrimOligos", "TrimOligos");
73                 exit(1);
74         }
75 }
76 /********************************************************************/
77 TrimOligos::~TrimOligos() {}
78 //*******************************************************************/
79 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
80         try {
81                 
82                 string rawSequence = seq.getUnaligned();
83                 int success = bdiffs + 1;       //guilty until proven innocent
84                 
85                 //can you find the barcode
86                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
87                         string oligo = it->first;
88                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
89                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
90                                 break;  
91                         }
92                         
93                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
94                                 group = it->second;
95                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
96                                 
97                                 if(qual.getName() != ""){
98                                         qual.trimQScores(oligo.length(), -1);
99                                 }
100                                 
101                                 success = 0;
102                                 break;
103                         }
104                 }
105                 
106                 //if you found the barcode or if you don't want to allow for diffs
107                 if ((bdiffs == 0) || (success == 0)) { return success;  }
108                 
109                 else { //try aligning and see if you can find it
110                         
111                         int maxLength = 0;
112                         
113                         Alignment* alignment;
114                         if (barcodes.size() > 0) {
115                                 map<string,int>::iterator it; 
116                                 
117                                 for(it=barcodes.begin();it!=barcodes.end();it++){
118                                         if(it->first.length() > maxLength){
119                                                 maxLength = it->first.length();
120                                         }
121                                 }
122                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
123                                 
124                         }else{ alignment = NULL; } 
125                         
126                         //can you find the barcode
127                         int minDiff = 1e6;
128                         int minCount = 1;
129                         int minGroup = -1;
130                         int minPos = 0;
131                         
132                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
133                                 string oligo = it->first;
134                                 //                              int length = oligo.length();
135                                 
136                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
137                                         success = bdiffs + 10;
138                                         break;
139                                 }
140                                 
141                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
142                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
143                                 oligo = alignment->getSeqAAln();
144                                 string temp = alignment->getSeqBAln();
145                                 
146                                 int alnLength = oligo.length();
147                                 
148                                 for(int i=oligo.length()-1;i>=0;i--){
149                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
150                                 }
151                                 oligo = oligo.substr(0,alnLength);
152                                 temp = temp.substr(0,alnLength);
153                 int numDiff = countDiffs(oligo, temp);
154                                 
155                                 if(numDiff < minDiff){
156                                         minDiff = numDiff;
157                                         minCount = 1;
158                                         minGroup = it->second;
159                                         minPos = 0;
160                                         for(int i=0;i<alnLength;i++){
161                                                 if(temp[i] != '-'){
162                                                         minPos++;
163                                                 }
164                                         }
165                                 }
166                                 else if(numDiff == minDiff){
167                                         minCount++;
168                                 }
169                                 
170                         }
171                         
172                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
173                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
174                         else{                                                                                                   //use the best match
175                                 group = minGroup;
176                                 seq.setUnaligned(rawSequence.substr(minPos));
177     
178                                 if(qual.getName() != ""){
179                                         qual.trimQScores(minPos, -1);
180                                 }
181                                 success = minDiff;
182                         }
183                         
184                         if (alignment != NULL) {  delete alignment;  }
185                         
186                 }
187                 
188                 return success;
189                 
190         }
191         catch(exception& e) {
192                 m->errorOut(e, "TrimOligos", "stripBarcode");
193                 exit(1);
194         }
195 }
196 //*******************************************************************/
197 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
198         try {
199                 //look for forward barcode
200                 string rawFSequence = forwardSeq.getUnaligned();
201         string rawRSequence = reverseSeq.getUnaligned();
202                 int success = bdiffs + 1;       //guilty until proven innocent
203                 
204                 //can you find the forward barcode
205                 for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
206                         string foligo = it->second.forward;
207             string roligo = it->second.reverse;
208             
209                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
210                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
211                                 break;  
212                         }
213             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
214                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
215                                 break;  
216                         }
217                         
218                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
219                                 group = it->first;
220                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
221                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
222                 forwardQual.trimQScores(foligo.length(), -1);
223                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
224                                 success = 0;
225                                 break;
226                         }
227                 }
228                 
229                 //if you found the barcode or if you don't want to allow for diffs
230                 if ((bdiffs == 0) || (success == 0)) { return success;  }
231                 else { //try aligning and see if you can find it
232                         
233             //look for forward
234                         int maxLength = 0;
235                         
236                         Alignment* alignment;
237                         if (ibarcodes.size() > 0) {
238                                 for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
239                                         if(it->second.forward.length() > maxLength){ maxLength = it->second.forward.length();  }
240                                 }
241                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
242                         }else{ alignment = NULL; } 
243                         
244                         //can you find the barcode
245                         int minDiff = 1e6;
246                         int minCount = 1;
247                         int minFGroup = -1;
248                         int minFPos = 0;
249                         
250                         for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
251                                 string oligo = it->second.forward;
252                                 
253                                 if(rawFSequence.length() < maxLength){  //let's just assume that the barcodes are the same length
254                                         success = bdiffs + 10;
255                                         break;
256                                 }
257                                 
258                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
259                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
260                                 oligo = alignment->getSeqAAln();
261                                 string temp = alignment->getSeqBAln();
262                                 
263                                 int alnLength = oligo.length();
264                                 
265                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
266                                 oligo = oligo.substr(0,alnLength);
267                                 temp = temp.substr(0,alnLength);
268                 int numDiff = countDiffs(oligo, temp);
269                                 
270                                 if(numDiff < minDiff){
271                                         minDiff = numDiff;
272                                         minCount = 1;
273                                         minFGroup = it->first;
274                                         minFPos = 0;
275                                         for(int i=0;i<alnLength;i++){
276                                                 if(temp[i] != '-'){
277                                                         minFPos++;
278                                                 }
279                                         }
280                                 }else if(numDiff == minDiff){
281                                         minCount++;
282                                 }
283                                 
284                         }
285                         
286                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
287                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
288                         else{   
289                 //check for reverse match
290                 if (alignment != NULL) {  delete alignment;  }
291                 maxLength = 0;
292                 
293                 if (ibarcodes.size() > 0) {
294                     for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
295                         if(it->second.reverse.length() > maxLength){ maxLength = it->second.reverse.length();  }
296                     }
297                     alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
298                 }else{ alignment = NULL; } 
299                 
300                 //can you find the barcode
301                 minDiff = 1e6;
302                 minCount = 1;
303                 int minRGroup = -1;
304                 int minRPos = 0;
305                 
306                 for(map<int,oligosPair>::iterator it=ibarcodes.begin();it!=ibarcodes.end();it++){
307                     string oligo = it->second.reverse;
308                     
309                     if(rawRSequence.length() < maxLength){      //let's just assume that the barcodes are the same length
310                         success = bdiffs + 10;
311                         break;
312                     }
313                     
314                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
315                     alignment->align(oligo, rawRSequence.substr((rawRSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
316                     oligo = alignment->getSeqAAln();
317                     string temp = alignment->getSeqBAln();
318                     
319                     int alnLength = oligo.length();
320                     for(int i=0;i<alnLength;i++){ if(oligo[i] != '-'){  alnLength = i;  break;  } }
321                     oligo = oligo.substr(0,alnLength);
322                     temp = temp.substr(0,alnLength);
323                     int numDiff = countDiffs(oligo, temp);
324                     
325                     if(numDiff < minDiff){
326                         minDiff = numDiff;
327                         minCount = 1;
328                         minRGroup = it->first;
329                         minRPos = 0;
330                         for(int i=0;i<alnLength;i++){
331                             if(temp[i] != '-'){
332                                 minRPos++;
333                             }
334                         }
335                     }else if(numDiff == minDiff){
336                         minCount++;
337                     }
338                     
339                 }
340
341                 if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
342                 else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
343                 else{
344                     //we have an acceptable match for the forward and reverse, but do they match?
345                     if (minFGroup == minRGroup) {
346                         group = minFGroup;
347                         forwardSeq.setUnaligned(rawFSequence.substr(minFPos));
348                         reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-minRPos)));
349                         forwardQual.trimQScores(minFPos, -1);
350                         reverseQual.trimQScores(-1, rawRSequence.length()-minRPos);
351                         success = minDiff;
352                     }else { success = bdiffs + 100;     }
353                 }
354                         }
355                         
356                         if (alignment != NULL) {  delete alignment;  }
357                 }
358                 
359                 return success;
360                 
361         }
362         catch(exception& e) {
363                 m->errorOut(e, "TrimOligos", "stripIBarcode");
364                 exit(1);
365         }
366         
367 }
368 //*******************************************************************/
369 int TrimOligos::stripBarcode(Sequence& seq, int& group){
370         try {
371                 
372                 string rawSequence = seq.getUnaligned();
373                 int success = bdiffs + 1;       //guilty until proven innocent
374                 
375                 //can you find the barcode
376                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
377                         string oligo = it->first;
378                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
379                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
380                                 break;  
381                         }
382                         
383                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
384                                 group = it->second;
385                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
386                                 
387                                 success = 0;
388                                 break;
389                         }
390                 }
391                 
392                 //if you found the barcode or if you don't want to allow for diffs
393                 if ((bdiffs == 0) || (success == 0)) { return success;  }
394                 
395                 else { //try aligning and see if you can find it
396                         
397                         int maxLength = 0;
398                         
399                         Alignment* alignment;
400                         if (barcodes.size() > 0) {
401                                 map<string,int>::iterator it=barcodes.begin();
402                                 
403                                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
404                                         if(it->first.length() > maxLength){
405                                                 maxLength = it->first.length();
406                                         }
407                                 }
408                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
409                                 
410                         }else{ alignment = NULL; } 
411                         
412                         //can you find the barcode
413                         int minDiff = 1e6;
414                         int minCount = 1;
415                         int minGroup = -1;
416                         int minPos = 0;
417                         
418                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
419                                 string oligo = it->first;
420                                 //                              int length = oligo.length();
421                                 
422                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
423                                         success = bdiffs + 10;
424                                         break;
425                                 }
426                                 
427                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
428                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
429                                 oligo = alignment->getSeqAAln();
430                                 string temp = alignment->getSeqBAln();
431                                  
432                                 int alnLength = oligo.length();
433                                 
434                                 for(int i=oligo.length()-1;i>=0;i--){
435                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
436                                 }
437                                 oligo = oligo.substr(0,alnLength);
438                                 temp = temp.substr(0,alnLength);
439                                  
440                                 int numDiff = countDiffs(oligo, temp);
441                                 
442                                 if(numDiff < minDiff){
443                                         minDiff = numDiff;
444                                         minCount = 1;
445                                         minGroup = it->second;
446                                         minPos = 0;
447                                         for(int i=0;i<alnLength;i++){
448                                                 if(temp[i] != '-'){
449                                                         minPos++;
450                                                 }
451                                         }
452                                 }
453                                 else if(numDiff == minDiff){
454                                         minCount++;
455                                 }
456                                 
457                         }
458                         
459                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
460                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
461                         else{                                                                                                   //use the best match
462                                 group = minGroup;
463                                 seq.setUnaligned(rawSequence.substr(minPos));
464                                 success = minDiff;
465                         }
466                         
467                         if (alignment != NULL) {  delete alignment;  }
468                         
469                 }
470                 
471                 return success;
472                 
473         }
474         catch(exception& e) {
475                 m->errorOut(e, "TrimOligos", "stripBarcode");
476                 exit(1);
477         }
478         
479 }
480 /*******************************************************************
481 int TrimOligos::stripRBarcode(Sequence& seq, QualityScores& qual, int& group){
482         try {
483                 
484                 string rawSequence = seq.getUnaligned();
485                 int success = bdiffs + 1;       //guilty until proven innocent
486                 
487                 //can you find the barcode
488                 for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
489                         string oligo = it->first;
490                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
491                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
492                                 break;  
493                         }
494             
495                         if(compareDNASeq(oligo, rawSequence.substr((rawSequence.length()-oligo.length()),oligo.length()))){
496                                 group = it->second;
497                                 seq.setUnaligned(rawSequence.substr(0,(rawSequence.length()-oligo.length())));
498                                 
499                                 if(qual.getName() != ""){
500                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
501                                 }
502                                 
503                                 success = 0;
504                                 break;
505                         }
506                 }
507                 
508                 //if you found the barcode or if you don't want to allow for diffs
509                 if ((bdiffs == 0) || (success == 0)) { return success;  }
510                 
511                 else { //try aligning and see if you can find it
512                         
513                         int maxLength = 0;
514                         
515                         Alignment* alignment;
516                         if (rbarcodes.size() > 0) {
517                                 map<string,int>::iterator it; 
518                                 
519                                 for(it=rbarcodes.begin();it!=rbarcodes.end();it++){
520                                         if(it->first.length() > maxLength){
521                                                 maxLength = it->first.length();
522                                         }
523                                 }
524                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
525                                 
526                         }else{ alignment = NULL; } 
527                         
528                         //can you find the barcode
529                         int minDiff = 1e6;
530                         int minCount = 1;
531                         int minGroup = -1;
532                         int minPos = 0;
533                         
534                         for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
535                                 string oligo = it->first;
536                                 //                              int length = oligo.length();
537                                 
538                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
539                                         success = bdiffs + 10;
540                                         break;
541                                 }
542                                 
543                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
544                                 alignment->align(oligo, rawSequence.substr((rawSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
545                                 oligo = alignment->getSeqAAln();
546                                 string temp = alignment->getSeqBAln();
547      
548                                 int alnLength = oligo.length();
549                                 
550                                 for(int i=0;i<alnLength;i++){
551                                         if(oligo[i] != '-'){    alnLength = i;  break;  }
552                                 }
553                                 oligo = oligo.substr(alnLength);
554                                 temp = temp.substr(alnLength);
555                                 
556                                 int numDiff = countDiffs(oligo, temp);
557                                 
558                                 if(numDiff < minDiff){
559                                         minDiff = numDiff;
560                                         minCount = 1;
561                                         minGroup = it->second;
562                                         minPos = 0;
563                                         for(int i=alnLength-1;i>=0;i--){
564                                                 if(temp[i] != '-'){
565                                                         minPos++;
566                                                 }
567                                         }
568                                 }
569                                 else if(numDiff == minDiff){
570                                         minCount++;
571                                 }
572                                 
573                         }
574                         
575                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
576                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
577                         else{                                                                                                   //use the best match
578                                 group = minGroup;
579                                 seq.setUnaligned(rawSequence.substr(0, (rawSequence.length()-minPos)));
580                 
581                                 if(qual.getName() != ""){
582                                         qual.trimQScores(-1, (rawSequence.length()-minPos));
583                                 }
584                                 success = minDiff;
585                         }
586                         
587                         if (alignment != NULL) {  delete alignment;  }
588                         
589                 }
590                 
591                 return success;
592                 
593         }
594         catch(exception& e) {
595                 m->errorOut(e, "TrimOligos", "stripRBarcode");
596                 exit(1);
597         }
598         
599 }
600 /*******************************************************************
601 int TrimOligos::stripRBarcode(Sequence& seq, int& group){
602         try {
603                 
604                 string rawSequence = seq.getUnaligned();
605                 int success = bdiffs + 1;       //guilty until proven innocent
606                 
607                 //can you find the barcode
608                 for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
609                         string oligo = it->first;
610                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
611                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
612                                 break;  
613                         }
614             
615                         if(compareDNASeq(oligo, rawSequence.substr((rawSequence.length()-oligo.length()),oligo.length()))){
616                                 group = it->second;
617                                 seq.setUnaligned(rawSequence.substr(0,(rawSequence.length()-oligo.length())));
618                                 
619                                 success = 0;
620                                 break;
621                         }
622                 }
623                 
624                 //if you found the barcode or if you don't want to allow for diffs
625                 if ((bdiffs == 0) || (success == 0)) { return success;  }
626                 
627                 else { //try aligning and see if you can find it
628                         
629                         int maxLength = 0;
630                         
631                         Alignment* alignment;
632                         if (rbarcodes.size() > 0) {
633                                 map<string,int>::iterator it; 
634                                 
635                                 for(it=rbarcodes.begin();it!=rbarcodes.end();it++){
636                                         if(it->first.length() > maxLength){
637                                                 maxLength = it->first.length();
638                                         }
639                                 }
640                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
641                                 
642                         }else{ alignment = NULL; } 
643                         
644                         //can you find the barcode
645                         int minDiff = 1e6;
646                         int minCount = 1;
647                         int minGroup = -1;
648                         int minPos = 0;
649                         
650                         for(map<string,int>::iterator it=rbarcodes.begin();it!=rbarcodes.end();it++){
651                                 string oligo = it->first;
652                                 //                              int length = oligo.length();
653                                 
654                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
655                                         success = bdiffs + 10;
656                                         break;
657                                 }
658                                 
659                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
660                                 alignment->align(oligo, rawSequence.substr((rawSequence.length()-(oligo.length()+bdiffs)),oligo.length()+bdiffs));
661                                 oligo = alignment->getSeqAAln();
662                                 string temp = alignment->getSeqBAln();
663                 
664                                 int alnLength = oligo.length();
665                                 
666                                 for(int i=0;i<alnLength;i++){
667                                         if(oligo[i] != '-'){    alnLength = i;  break;  }
668                                 }
669                                 oligo = oligo.substr(alnLength);
670                                 temp = temp.substr(alnLength);
671                                 
672                                 int numDiff = countDiffs(oligo, temp);
673                                 
674                                 if(numDiff < minDiff){
675                                         minDiff = numDiff;
676                                         minCount = 1;
677                                         minGroup = it->second;
678                                         minPos = 0;
679                                         for(int i=alnLength-1;i>=0;i--){
680                                                 if(temp[i] != '-'){
681                                                         minPos++;
682                                                 }
683                                         }
684                                 }
685                                 else if(numDiff == minDiff){
686                                         minCount++;
687                                 }
688                                 
689                         }
690                         
691                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
692                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
693                         else{                                                                                                   //use the best match
694                                 group = minGroup;
695                                 seq.setUnaligned(rawSequence.substr(0, (rawSequence.length()-minPos)));
696                 
697                                 success = minDiff;
698                         }
699                         
700                         if (alignment != NULL) {  delete alignment;  }
701                         
702                 }
703                 
704                 return success;
705                 
706         }
707         catch(exception& e) {
708                 m->errorOut(e, "TrimOligos", "stripRBarcode");
709                 exit(1);
710         }
711         
712 }
713 //********************************************************************/
714 int TrimOligos::stripForward(Sequence& seq, int& group){
715         try {
716                 
717                 string rawSequence = seq.getUnaligned();
718                 int success = pdiffs + 1;       //guilty until proven innocent
719                 
720                 //can you find the primer
721                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
722                         string oligo = it->first;
723                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
724                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
725                                 break;  
726                         }
727                         
728                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
729                                 group = it->second;
730                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
731                                 success = 0;
732                                 break;
733                         }
734                 }
735                 
736                 //if you found the barcode or if you don't want to allow for diffs
737                 if ((pdiffs == 0) || (success == 0)) {  return success;  }
738                 
739                 else { //try aligning and see if you can find it
740                         
741                         int maxLength = 0;
742                         
743                         Alignment* alignment;
744                         if (primers.size() > 0) {
745                                 map<string,int>::iterator it; 
746                                 
747                                 for(it=primers.begin();it!=primers.end();it++){
748                                         if(it->first.length() > maxLength){
749                                                 maxLength = it->first.length();
750                                         }
751                                 }
752                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
753                                 
754                         }else{ alignment = NULL; } 
755                         
756                         //can you find the barcode
757                         int minDiff = 1e6;
758                         int minCount = 1;
759                         int minGroup = -1;
760                         int minPos = 0;
761                         
762                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
763                                 string oligo = it->first;
764                                 //                              int length = oligo.length();
765                                 
766                                 if(rawSequence.length() < maxLength){   
767                                         success = pdiffs + 100;
768                                         break;
769                                 }
770                                 
771                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
772                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
773                                 oligo = alignment->getSeqAAln();
774                                 string temp = alignment->getSeqBAln();
775                                 
776                                 int alnLength = oligo.length();
777                                 
778                                 for(int i=oligo.length()-1;i>=0;i--){
779                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
780                                 }
781                                 oligo = oligo.substr(0,alnLength);
782                                 temp = temp.substr(0,alnLength);
783                                 
784                                 int numDiff = countDiffs(oligo, temp);
785                                 
786                                 if(numDiff < minDiff){
787                                         minDiff = numDiff;
788                                         minCount = 1;
789                                         minGroup = it->second;
790                                         minPos = 0;
791                                         for(int i=0;i<alnLength;i++){
792                                                 if(temp[i] != '-'){
793                                                         minPos++;
794                                                 }
795                                         }
796                                 }
797                                 else if(numDiff == minDiff){
798                                         minCount++;
799                                 }
800                                 
801                         }
802                         
803                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
804                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
805                         else{                                                                                                   //use the best match
806                                 group = minGroup;
807                                 seq.setUnaligned(rawSequence.substr(minPos));
808                                 success = minDiff;
809                         }
810                         
811                         if (alignment != NULL) {  delete alignment;  }
812                         
813                 }
814                 
815                 return success;
816                 
817         }
818         catch(exception& e) {
819                 m->errorOut(e, "TrimOligos", "stripForward");
820                 exit(1);
821         }
822 }
823 //*******************************************************************/
824 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
825         try {
826                 string rawSequence = seq.getUnaligned();
827                 int success = pdiffs + 1;       //guilty until proven innocent
828                 
829                 //can you find the primer
830                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
831                         string oligo = it->first;
832                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
833                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
834                                 break;  
835                         }
836                         
837                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
838                                 group = it->second;
839                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
840                                 if(qual.getName() != ""){
841                                         if (!keepForward) {  qual.trimQScores(oligo.length(), -1); }
842                                 }
843                                 success = 0;
844                                 break;
845                         }
846                 }
847                 
848                 //if you found the barcode or if you don't want to allow for diffs
849                 if ((pdiffs == 0) || (success == 0)) { return success;  }
850                 
851                 else { //try aligning and see if you can find it
852                         
853                         int maxLength = 0;
854                         
855                         Alignment* alignment;
856                         if (primers.size() > 0) {
857                                 map<string,int>::iterator it; 
858                                 
859                                 for(it=primers.begin();it!=primers.end();it++){
860                                         if(it->first.length() > maxLength){
861                                                 maxLength = it->first.length();
862                                         }
863                                 }
864                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
865                                 
866                         }else{ alignment = NULL; } 
867                         
868                         //can you find the barcode
869                         int minDiff = 1e6;
870                         int minCount = 1;
871                         int minGroup = -1;
872                         int minPos = 0;
873                         
874                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
875                                 string oligo = it->first;
876                                 //                              int length = oligo.length();
877                                 
878                                 if(rawSequence.length() < maxLength){   
879                                         success = pdiffs + 100;
880                                         break;
881                                 }
882                                 
883                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
884                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
885                                 oligo = alignment->getSeqAAln();
886                                 string temp = alignment->getSeqBAln();
887                                 
888                                 int alnLength = oligo.length();
889                                 
890                                 for(int i=oligo.length()-1;i>=0;i--){
891                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
892                                 }
893                                 oligo = oligo.substr(0,alnLength);
894                                 temp = temp.substr(0,alnLength);
895                                 
896                                 int numDiff = countDiffs(oligo, temp);
897                                 
898                                 if(numDiff < minDiff){
899                                         minDiff = numDiff;
900                                         minCount = 1;
901                                         minGroup = it->second;
902                                         minPos = 0;
903                                         for(int i=0;i<alnLength;i++){
904                                                 if(temp[i] != '-'){
905                                                         minPos++;
906                                                 }
907                                         }
908                                 }
909                                 else if(numDiff == minDiff){
910                                         minCount++;
911                                 }
912                                 
913                         }
914                         
915                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
916                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
917                         else{                                                                                                   //use the best match
918                                 group = minGroup;
919                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
920                                 if(qual.getName() != ""){
921                                         if (!keepForward) { qual.trimQScores(minPos, -1); }
922                                 }
923                                 success = minDiff;
924                         }
925                         
926                         if (alignment != NULL) {  delete alignment;  }
927                         
928                 }
929                 
930                 return success;
931                 
932         }
933         catch(exception& e) {
934                 m->errorOut(e, "TrimOligos", "stripForward");
935                 exit(1);
936         }
937 }
938 //******************************************************************/
939 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
940         try {
941                 string rawSequence = seq.getUnaligned();
942                 bool success = 0;       //guilty until proven innocent
943                 
944                 for(int i=0;i<revPrimer.size();i++){
945                         string oligo = revPrimer[i];
946                         
947                         if(rawSequence.length() < oligo.length()){
948                                 success = 0;
949                                 break;
950                         }
951                         
952                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
953                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
954                                 if(qual.getName() != ""){
955                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
956                                 }
957                                 success = 1;
958                                 break;
959                         }
960                 }       
961                 return success;
962                 
963         }
964         catch(exception& e) {
965                 m->errorOut(e, "TrimOligos", "stripReverse");
966                 exit(1);
967         }
968 }
969 //******************************************************************/
970 bool TrimOligos::stripReverse(Sequence& seq){
971         try {
972                 
973                 string rawSequence = seq.getUnaligned();
974                 bool success = 0;       //guilty until proven innocent
975                 
976                 for(int i=0;i<revPrimer.size();i++){
977                         string oligo = revPrimer[i];
978                         
979                         if(rawSequence.length() < oligo.length()){
980                                 success = 0;
981                                 break;
982                         }
983                         
984                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
985                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
986                                 success = 1;
987                                 break;
988                         }
989                 }       
990                 
991                 return success;
992                 
993         }
994         catch(exception& e) {
995                 m->errorOut(e, "TrimOligos", "stripReverse");
996                 exit(1);
997         }
998 }
999 //******************************************************************/
1000 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
1001         try {
1002                 string rawSequence = seq.getUnaligned();
1003                 bool success = ldiffs + 1;      //guilty until proven innocent
1004                 
1005                 for(int i=0;i<linker.size();i++){
1006                         string oligo = linker[i];
1007                         
1008                         if(rawSequence.length() < oligo.length()){
1009                                 success = ldiffs + 10;
1010                                 break;
1011                         }
1012                         
1013                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1014                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1015                                 if(qual.getName() != ""){
1016                                         qual.trimQScores(oligo.length(), -1);
1017                                 }
1018                                 success = 0;
1019                                 break;
1020                         }
1021                 }
1022         
1023         //if you found the linker or if you don't want to allow for diffs
1024                 if ((ldiffs == 0) || (success == 0)) { return success;  }
1025                 
1026                 else { //try aligning and see if you can find it
1027                         
1028                         int maxLength = 0;
1029                         
1030                         Alignment* alignment;
1031                         if (linker.size() > 0) {
1032                                 for(int i = 0; i < linker.size(); i++){
1033                                         if(linker[i].length() > maxLength){
1034                                                 maxLength = linker[i].length();
1035                                         }
1036                                 }
1037                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));  
1038                                 
1039                         }else{ alignment = NULL; } 
1040                         
1041                         //can you find the barcode
1042                         int minDiff = 1e6;
1043                         int minCount = 1;
1044                         int minPos = 0;
1045                         
1046                         for(int i = 0; i < linker.size(); i++){
1047                                 string oligo = linker[i];
1048                                 //                              int length = oligo.length();
1049                                 
1050                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1051                                         success = ldiffs + 10;
1052                                         break;
1053                                 }
1054                                 
1055                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1056                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1057                                 oligo = alignment->getSeqAAln();
1058                                 string temp = alignment->getSeqBAln();
1059                                 
1060                                 int alnLength = oligo.length();
1061                                 
1062                                 for(int i=oligo.length()-1;i>=0;i--){
1063                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1064                                 }
1065                                 oligo = oligo.substr(0,alnLength);
1066                                 temp = temp.substr(0,alnLength);
1067                                 
1068                                 int numDiff = countDiffs(oligo, temp);
1069                                 
1070                                 if(numDiff < minDiff){
1071                                         minDiff = numDiff;
1072                                         minCount = 1;
1073                                         minPos = 0;
1074                                         for(int i=0;i<alnLength;i++){
1075                                                 if(temp[i] != '-'){
1076                                                         minPos++;
1077                                                 }
1078                                         }
1079                                 }
1080                                 else if(numDiff == minDiff){
1081                                         minCount++;
1082                                 }
1083                                 
1084                         }
1085                         
1086                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
1087                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
1088                         else{                                                                                                   //use the best match
1089                                 seq.setUnaligned(rawSequence.substr(minPos));
1090                                 
1091                                 if(qual.getName() != ""){
1092                                         qual.trimQScores(minPos, -1);
1093                                 }
1094                                 success = minDiff;
1095                         }
1096                         
1097                         if (alignment != NULL) {  delete alignment;  }
1098                         
1099                 }
1100
1101        
1102                 return success;
1103                 
1104         }
1105         catch(exception& e) {
1106                 m->errorOut(e, "TrimOligos", "stripLinker");
1107                 exit(1);
1108         }
1109 }
1110 //******************************************************************/
1111 bool TrimOligos::stripLinker(Sequence& seq){
1112         try {
1113                 
1114                 string rawSequence = seq.getUnaligned();
1115                 bool success = ldiffs +1;       //guilty until proven innocent
1116                 
1117                 for(int i=0;i<linker.size();i++){
1118                         string oligo = linker[i];
1119                         
1120                         if(rawSequence.length() < oligo.length()){
1121                                 success = ldiffs +10;
1122                                 break;
1123                         }
1124                         
1125                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1126                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1127                                 success = 0;
1128                                 break;
1129                         }
1130                 }       
1131                 
1132         //if you found the linker or if you don't want to allow for diffs
1133                 if ((ldiffs == 0) || (success == 0)) { return success;  }
1134                 
1135                 else { //try aligning and see if you can find it
1136                         
1137                         int maxLength = 0;
1138                         
1139                         Alignment* alignment;
1140                         if (linker.size() > 0) {
1141                                 for(int i = 0; i < linker.size(); i++){
1142                                         if(linker[i].length() > maxLength){
1143                                                 maxLength = linker[i].length();
1144                                         }
1145                                 }
1146                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));  
1147                                 
1148                         }else{ alignment = NULL; } 
1149                         
1150                         //can you find the barcode
1151                         int minDiff = 1e6;
1152                         int minCount = 1;
1153                         int minPos = 0;
1154                         
1155                         for(int i = 0; i < linker.size(); i++){
1156                                 string oligo = linker[i];
1157                                 //                              int length = oligo.length();
1158                                 
1159                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1160                                         success = ldiffs + 10;
1161                                         break;
1162                                 }
1163                                 
1164                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1165                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1166                                 oligo = alignment->getSeqAAln();
1167                                 string temp = alignment->getSeqBAln();
1168                                 
1169                                 int alnLength = oligo.length();
1170                                 
1171                                 for(int i=oligo.length()-1;i>=0;i--){
1172                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1173                                 }
1174                                 oligo = oligo.substr(0,alnLength);
1175                                 temp = temp.substr(0,alnLength);
1176                                 
1177                                 int numDiff = countDiffs(oligo, temp);
1178                                 
1179                                 if(numDiff < minDiff){
1180                                         minDiff = numDiff;
1181                                         minCount = 1;
1182                                         minPos = 0;
1183                                         for(int i=0;i<alnLength;i++){
1184                                                 if(temp[i] != '-'){
1185                                                         minPos++;
1186                                                 }
1187                                         }
1188                                 }
1189                                 else if(numDiff == minDiff){
1190                                         minCount++;
1191                                 }
1192                                 
1193                         }
1194                         
1195                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
1196                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
1197                         else{                                                                                                   //use the best match
1198                                 seq.setUnaligned(rawSequence.substr(minPos));
1199                                 success = minDiff;
1200                         }
1201                         
1202                         if (alignment != NULL) {  delete alignment;  }
1203                         
1204                 }
1205
1206                 return success;
1207                 
1208         }
1209         catch(exception& e) {
1210                 m->errorOut(e, "TrimOligos", "stripLinker");
1211                 exit(1);
1212         }
1213 }
1214
1215 //******************************************************************/
1216 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
1217         try {
1218                 string rawSequence = seq.getUnaligned();
1219                 bool success = sdiffs+1;        //guilty until proven innocent
1220                 
1221                 for(int i=0;i<spacer.size();i++){
1222                         string oligo = spacer[i];
1223                         
1224                         if(rawSequence.length() < oligo.length()){
1225                                 success = sdiffs+10;
1226                                 break;
1227                         }
1228                         
1229                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1230                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1231                                 if(qual.getName() != ""){
1232                                         qual.trimQScores(oligo.length(), -1);
1233                                 }
1234                                 success = 0;
1235                                 break;
1236                         }
1237                 }
1238         
1239         //if you found the spacer or if you don't want to allow for diffs
1240                 if ((sdiffs == 0) || (success == 0)) { return success;  }
1241                 
1242                 else { //try aligning and see if you can find it
1243                         
1244                         int maxLength = 0;
1245                         
1246                         Alignment* alignment;
1247                         if (spacer.size() > 0) {
1248                                 for(int i = 0; i < spacer.size(); i++){
1249                                         if(spacer[i].length() > maxLength){
1250                                                 maxLength = spacer[i].length();
1251                                         }
1252                                 }
1253                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));  
1254                                 
1255                         }else{ alignment = NULL; } 
1256                         
1257                         //can you find the barcode
1258                         int minDiff = 1e6;
1259                         int minCount = 1;
1260                         int minPos = 0;
1261                         
1262                         for(int i = 0; i < spacer.size(); i++){
1263                                 string oligo = spacer[i];
1264                                 //                              int length = oligo.length();
1265                                 
1266                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1267                                         success = sdiffs + 10;
1268                                         break;
1269                                 }
1270                                 
1271                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1272                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1273                                 oligo = alignment->getSeqAAln();
1274                                 string temp = alignment->getSeqBAln();
1275                                 
1276                                 int alnLength = oligo.length();
1277                                 
1278                                 for(int i=oligo.length()-1;i>=0;i--){
1279                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1280                                 }
1281                                 oligo = oligo.substr(0,alnLength);
1282                                 temp = temp.substr(0,alnLength);
1283                                 
1284                                 int numDiff = countDiffs(oligo, temp);
1285                                 
1286                                 if(numDiff < minDiff){
1287                                         minDiff = numDiff;
1288                                         minCount = 1;
1289                                         minPos = 0;
1290                                         for(int i=0;i<alnLength;i++){
1291                                                 if(temp[i] != '-'){
1292                                                         minPos++;
1293                                                 }
1294                                         }
1295                                 }
1296                                 else if(numDiff == minDiff){
1297                                         minCount++;
1298                                 }
1299                                 
1300                         }
1301                         
1302                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
1303                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
1304                         else{                                                                                                   //use the best match
1305                                 seq.setUnaligned(rawSequence.substr(minPos));
1306                                 
1307                                 if(qual.getName() != ""){
1308                                         qual.trimQScores(minPos, -1);
1309                                 }
1310                                 success = minDiff;
1311                         }
1312                         
1313                         if (alignment != NULL) {  delete alignment;  }
1314                         
1315                 }
1316         
1317
1318                 return success;
1319                 
1320         }
1321         catch(exception& e) {
1322                 m->errorOut(e, "TrimOligos", "stripSpacer");
1323                 exit(1);
1324         }
1325 }
1326 //******************************************************************/
1327 bool TrimOligos::stripSpacer(Sequence& seq){
1328         try {
1329                 
1330                 string rawSequence = seq.getUnaligned();
1331                 bool success = sdiffs+1;        //guilty until proven innocent
1332                 
1333                 for(int i=0;i<spacer.size();i++){
1334                         string oligo = spacer[i];
1335                         
1336                         if(rawSequence.length() < oligo.length()){
1337                                 success = sdiffs+10;
1338                                 break;
1339                         }
1340                         
1341                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1342                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1343                                 success = 0;
1344                                 break;
1345                         }
1346                 }       
1347                 
1348         //if you found the spacer or if you don't want to allow for diffs
1349                 if ((sdiffs == 0) || (success == 0)) { return success;  }
1350                 
1351                 else { //try aligning and see if you can find it
1352                         
1353                         int maxLength = 0;
1354                         
1355                         Alignment* alignment;
1356                         if (spacer.size() > 0) {
1357                                 for(int i = 0; i < spacer.size(); i++){
1358                                         if(spacer[i].length() > maxLength){
1359                                                 maxLength = spacer[i].length();
1360                                         }
1361                                 }
1362                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));  
1363                                 
1364                         }else{ alignment = NULL; } 
1365                         
1366                         //can you find the barcode
1367                         int minDiff = 1e6;
1368                         int minCount = 1;
1369                         int minPos = 0;
1370                         
1371                         for(int i = 0; i < spacer.size(); i++){
1372                                 string oligo = spacer[i];
1373                                 //                              int length = oligo.length();
1374                                 
1375                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1376                                         success = sdiffs + 10;
1377                                         break;
1378                                 }
1379                                 
1380                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1381                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1382                                 oligo = alignment->getSeqAAln();
1383                                 string temp = alignment->getSeqBAln();
1384                                 
1385                                 int alnLength = oligo.length();
1386                                 
1387                                 for(int i=oligo.length()-1;i>=0;i--){
1388                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1389                                 }
1390                                 oligo = oligo.substr(0,alnLength);
1391                                 temp = temp.substr(0,alnLength);
1392                                 
1393                                 int numDiff = countDiffs(oligo, temp);
1394                                 
1395                                 if(numDiff < minDiff){
1396                                         minDiff = numDiff;
1397                                         minCount = 1;
1398                                         minPos = 0;
1399                                         for(int i=0;i<alnLength;i++){
1400                                                 if(temp[i] != '-'){
1401                                                         minPos++;
1402                                                 }
1403                                         }
1404                                 }
1405                                 else if(numDiff == minDiff){
1406                                         minCount++;
1407                                 }
1408                                 
1409                         }
1410                         
1411                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
1412                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
1413                         else{                                                                                                   //use the best match
1414                                 seq.setUnaligned(rawSequence.substr(minPos));
1415                                 success = minDiff;
1416                         }
1417                         
1418                         if (alignment != NULL) {  delete alignment;  }
1419                         
1420                 }
1421
1422                 return success;
1423                 
1424         }
1425         catch(exception& e) {
1426                 m->errorOut(e, "TrimOligos", "stripSpacer");
1427                 exit(1);
1428         }
1429 }
1430
1431 //******************************************************************/
1432 bool TrimOligos::compareDNASeq(string oligo, string seq){
1433         try {
1434                 bool success = 1;
1435                 int length = oligo.length();
1436                 
1437                 for(int i=0;i<length;i++){
1438                         
1439                         if(oligo[i] != seq[i]){
1440                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1441                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1442                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1443                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1444                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1445                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1446                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1447                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1448                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1449                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1450                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1451                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1452                                 
1453                                 if(success == 0)        {       break;   }
1454                         }
1455                         else{
1456                                 success = 1;
1457                         }
1458                 }
1459                 
1460                 return success;
1461         }
1462         catch(exception& e) {
1463                 m->errorOut(e, "TrimOligos", "compareDNASeq");
1464                 exit(1);
1465         }
1466         
1467 }
1468 //********************************************************************/
1469 int TrimOligos::countDiffs(string oligo, string seq){
1470         try {
1471                 
1472                 int length = oligo.length();
1473                 int countDiffs = 0;
1474                 
1475                 for(int i=0;i<length;i++){
1476                         
1477                         if(oligo[i] != seq[i]){
1478                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1479                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1480                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1481                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1482                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1483                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1484                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1485                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1486                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1487                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1488                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1489                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1490                         }
1491                         
1492                 }
1493                 
1494                 return countDiffs;
1495         }
1496         catch(exception& e) {
1497                 m->errorOut(e, "TrimOligos", "countDiffs");
1498                 exit(1);
1499         }
1500 }
1501 /********************************************************************/
1502
1503
1504