]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / trimoligos.cpp
1 /*
2  *  trimoligos.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/1/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18         try {
19                 m = MothurOut::getInstance();
20                 
21                 pdiffs = p;
22                 bdiffs = b;
23         ldiffs = l;
24         sdiffs = s;
25                 
26                 barcodes = br;
27                 primers = pr;
28                 revPrimer = r;
29         linker = lk;
30         spacer = sp;
31         maxFBarcodeLength = 0;
32         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
33             if(it->first.length() > maxFBarcodeLength){
34                 maxFBarcodeLength = it->first.length();
35             }
36         }
37         maxFPrimerLength = 0;
38         map<string,int>::iterator it; 
39         for(it=primers.begin();it!=primers.end();it++){
40             if(it->first.length() > maxFPrimerLength){
41                 maxFPrimerLength = it->first.length();
42             }
43         }
44         
45         maxLinkerLength = 0;
46         for(int i = 0; i < linker.size(); i++){
47             if(linker[i].length() > maxLinkerLength){
48                 maxLinkerLength = linker[i].length();
49             }
50         }
51         
52         maxSpacerLength = 0;
53         for(int i = 0; i < spacer.size(); i++){
54             if(spacer[i].length() > maxSpacerLength){
55                 maxSpacerLength = spacer[i].length();
56             }
57         }
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "TrimOligos", "TrimOligos");
61                 exit(1);
62         }
63 }
64 /********************************************************************/
65 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
66 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
67         try {
68                 m = MothurOut::getInstance();
69                 
70                 pdiffs = p;
71                 bdiffs = b;
72         ldiffs = l;
73         sdiffs = s;
74                 
75         maxFBarcodeLength = 0;
76         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
77             string forward = it->second.forward;
78             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
79             
80             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length();  }
81             
82             if (itForward == ifbarcodes.end()) {
83                 vector<int> temp; temp.push_back(it->first);
84                 ifbarcodes[forward] = temp;
85             }else { itForward->second.push_back(it->first); }
86         }
87
88         maxFPrimerLength = 0;
89         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
90             string forward = it->second.forward;
91             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
92             
93             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length();  }
94             
95             if (itForward == ifprimers.end()) {
96                 vector<int> temp; temp.push_back(it->first);
97                 ifprimers[forward] = temp;
98             }else { itForward->second.push_back(it->first); }
99         }
100         
101         maxRBarcodeLength = 0;
102         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
103             string forward = it->second.reverse;
104             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
105             
106             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length();  }
107             
108             if (itForward == irbarcodes.end()) {
109                 vector<int> temp; temp.push_back(it->first);
110                 irbarcodes[forward] = temp;
111             }else { itForward->second.push_back(it->first); }
112         }
113         
114         maxRPrimerLength = 0;
115         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
116             string forward = it->second.reverse;
117             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
118             
119             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length();  }
120             
121             if (itForward == irprimers.end()) {
122                 vector<int> temp; temp.push_back(it->first);
123                 irprimers[forward] = temp;
124             }else { itForward->second.push_back(it->first); }
125         }
126
127         ipbarcodes = br;
128         ipprimers = pr;
129         }
130         catch(exception& e) {
131                 m->errorOut(e, "TrimOligos", "TrimOligos");
132                 exit(1);
133         }
134 }
135 /********************************************************************/
136 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
137 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
138         try {
139                 m = MothurOut::getInstance();
140                 
141                 pdiffs = p;
142                 bdiffs = b;
143                 
144                 barcodes = br;
145                 primers = pr;
146                 revPrimer = r;
147         }
148         catch(exception& e) {
149                 m->errorOut(e, "TrimOligos", "TrimOligos");
150                 exit(1);
151         }
152 }
153 /********************************************************************/
154 TrimOligos::~TrimOligos() {}
155 //*******************************************************************/
156 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
157         try {
158                 
159                 string rawSequence = seq.getUnaligned();
160                 int success = bdiffs + 1;       //guilty until proven innocent
161                 
162                 //can you find the barcode
163                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
164                         string oligo = it->first;
165                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
166                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
167                                 break;  
168                         }
169                         
170                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
171                                 group = it->second;
172                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
173                                 
174                                 if(qual.getName() != ""){
175                                         qual.trimQScores(oligo.length(), -1);
176                                 }
177                                 
178                                 success = 0;
179                                 break;
180                         }
181                 }
182                 
183                 //if you found the barcode or if you don't want to allow for diffs
184                 if ((bdiffs == 0) || (success == 0)) { return success;  }
185                 
186                 else { //try aligning and see if you can find it
187                         Alignment* alignment;
188                         if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
189                         else{ alignment = NULL; } 
190                         
191                         //can you find the barcode
192                         int minDiff = 1e6;
193                         int minCount = 1;
194                         int minGroup = -1;
195                         int minPos = 0;
196                         
197                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
198                                 string oligo = it->first;
199                                 //                              int length = oligo.length();
200                                 
201                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
202                                         success = bdiffs + 10;
203                                         break;
204                                 }
205                                 
206                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
207                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
208                                 oligo = alignment->getSeqAAln();
209                                 string temp = alignment->getSeqBAln();
210                                 
211                                 int alnLength = oligo.length();
212                                 
213                                 for(int i=oligo.length()-1;i>=0;i--){
214                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
215                                 }
216                                 oligo = oligo.substr(0,alnLength);
217                                 temp = temp.substr(0,alnLength);
218                 int numDiff = countDiffs(oligo, temp);
219                                 
220                                 if(numDiff < minDiff){
221                                         minDiff = numDiff;
222                                         minCount = 1;
223                                         minGroup = it->second;
224                                         minPos = 0;
225                                         for(int i=0;i<alnLength;i++){
226                                                 if(temp[i] != '-'){
227                                                         minPos++;
228                                                 }
229                                         }
230                                 }
231                                 else if(numDiff == minDiff){
232                                         minCount++;
233                                 }
234                                 
235                         }
236                         
237                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
238                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
239                         else{                                                                                                   //use the best match
240                                 group = minGroup;
241                                 seq.setUnaligned(rawSequence.substr(minPos));
242     
243                                 if(qual.getName() != ""){
244                                         qual.trimQScores(minPos, -1);
245                                 }
246                                 success = minDiff;
247                         }
248                         
249                         if (alignment != NULL) {  delete alignment;  }
250                         
251                 }
252                 
253                 return success;
254                 
255         }
256         catch(exception& e) {
257                 m->errorOut(e, "TrimOligos", "stripBarcode");
258                 exit(1);
259         }
260 }
261 //*******************************************************************/
262 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
263         try {
264                 //look for forward barcode
265                 string rawFSequence = forwardSeq.getUnaligned();
266         string rawRSequence = reverseSeq.getUnaligned();
267                 int success = bdiffs + 1;       //guilty until proven innocent
268                 
269                 //can you find the forward barcode
270                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
271                         string foligo = it->second.forward;
272             string roligo = it->second.reverse;
273             
274                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
275                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
276                                 break;  
277                         }
278             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
279                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
280                                 break;  
281                         }
282                         
283                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
284                                 group = it->first;
285                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
286                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
287                 forwardQual.trimQScores(foligo.length(), -1);
288                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
289                                 success = 0;
290                                 break;
291                         }
292                 }
293                 
294                 //if you found the barcode or if you don't want to allow for diffs
295                 if ((bdiffs == 0) || (success == 0)) { return success;  }
296                 else { //try aligning and see if you can find it
297                         Alignment* alignment;
298                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
299                         else{ alignment = NULL; } 
300                         
301                         //can you find the barcode
302                         int minDiff = 1e6;
303                         int minCount = 1;
304                         vector< vector<int> > minFGroup;
305                         vector<int> minFPos; 
306             
307             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
308             /*
309                 1   Sarah   Westcott
310                 2   John    Westcott
311                 3   Anna    Westcott
312                 4   Sarah   Schloss
313                 5   Pat     Schloss
314                 6   Gail    Brown
315                 7   Pat     Moore
316              
317                 only want to look for forward = Sarah, John, Anna, Pat, Gail
318                                       reverse = Westcott, Schloss, Brown, Moore
319              
320                 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
321              */
322             //cout << endl << forwardSeq.getName() << endl;         
323                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
324                                 string oligo = it->first;
325                                 
326                                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
327                                         success = bdiffs + 10;
328                                         break;
329                                 }
330                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
331                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
332                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
333                                 oligo = alignment->getSeqAAln();
334                                 string temp = alignment->getSeqBAln();
335                
336                                 int alnLength = oligo.length();
337                                 
338                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
339                                 oligo = oligo.substr(0,alnLength);
340                                 temp = temp.substr(0,alnLength);
341                 int numDiff = countDiffs(oligo, temp);
342                                 
343                 if (alnLength == 0) { numDiff = bdiffs + 100; }
344                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
345                 
346                                 if(numDiff < minDiff){
347                                         minDiff = numDiff;
348                                         minCount = 1;
349                                         minFGroup.clear();
350                     minFGroup.push_back(it->second);
351                                         int tempminFPos = 0;
352                     minFPos.clear();
353                                         for(int i=0;i<alnLength;i++){
354                                                 if(temp[i] != '-'){
355                                                         tempminFPos++;
356                                                 }
357                                         }
358                     minFPos.push_back(tempminFPos);
359                                 }else if(numDiff == minDiff){
360                                         minFGroup.push_back(it->second);
361                     int tempminFPos = 0;
362                                         for(int i=0;i<alnLength;i++){
363                                                 if(temp[i] != '-'){
364                                                         tempminFPos++;
365                                                 }
366                                         }
367                     minFPos.push_back(tempminFPos);
368                                 }
369                         }
370             
371                         //cout << minDiff << '\t' << minCount << '\t' << endl;
372                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
373                         else{   
374                 //check for reverse match
375                 if (alignment != NULL) {  delete alignment;  }
376                 
377                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
378                 else{ alignment = NULL; } 
379                 
380                 //can you find the barcode
381                 minDiff = 1e6;
382                 minCount = 1;
383                 vector< vector<int> > minRGroup;
384                 vector<int> minRPos;
385                 
386                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
387                     string oligo = it->first;
388                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
389                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
390                         success = bdiffs + 10;
391                         break;
392                     }
393                     
394                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
395                     alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
396                     oligo = alignment->getSeqAAln();
397                     string temp = alignment->getSeqBAln();
398                     
399                     int alnLength = oligo.length();
400                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
401                     oligo = oligo.substr(0,alnLength);
402                     temp = temp.substr(0,alnLength);
403                     int numDiff = countDiffs(oligo, temp);
404                     if (alnLength == 0) { numDiff = bdiffs + 100; }
405                     
406                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
407                     if(numDiff < minDiff){
408                         minDiff = numDiff;
409                         minCount = 1;
410                         minRGroup.clear();
411                         minRGroup.push_back(it->second);
412                         int tempminRPos = 0;
413                         minRPos.clear();
414                         for(int i=0;i<alnLength;i++){
415                             if(temp[i] != '-'){
416                                 tempminRPos++;
417                             }
418                         }
419                         minRPos.push_back(tempminRPos);                    
420                     }else if(numDiff == minDiff){
421                         int tempminRPos = 0;
422                         for(int i=0;i<alnLength;i++){
423                             if(temp[i] != '-'){
424                                 tempminRPos++;
425                             }
426                         }
427                         minRPos.push_back(tempminRPos);  
428                         minRGroup.push_back(it->second);
429                     }
430                     
431                 }
432                 
433                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
434                 for (int i = 0; i < minFGroup.size(); i++) { 
435                     cout << i << '\t';
436                     for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
437                     cout << endl;
438                 }
439                 cout << endl;
440                 for (int i = 0; i < minRGroup.size(); i++) { 
441                     cout << i << '\t';
442                     for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
443                     cout << endl;
444                 }
445                 cout << endl;*/
446                 if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
447                 else  {  
448                     bool foundMatch = false;
449                     vector<int> matches;
450                     for (int i = 0; i < minFGroup.size(); i++) {
451                         for (int j = 0; j < minFGroup[i].size(); j++) {
452                             for (int k = 0; k < minRGroup.size(); k++) {
453                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
454                             }
455                         }
456                     }
457                     
458                     int fStart = 0;
459                     int rStart = 0;
460                     if (matches.size() == 1) { 
461                         foundMatch = true;
462                         group = matches[0]; 
463                         fStart = minFPos[0];
464                         rStart = minRPos[0];
465                     }
466                     
467                     //we have an acceptable match for the forward and reverse, but do they match?
468                     if (foundMatch) {
469                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
470                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
471                         forwardQual.trimQScores(fStart, -1);
472                         reverseQual.trimQScores(rStart, -1);
473                         success = minDiff;
474                     }else { success = bdiffs + 100;     }
475                 }
476                         }
477                         
478                         if (alignment != NULL) {  delete alignment;  }
479                 }
480                 
481                 return success;
482                 
483         }
484         catch(exception& e) {
485                 m->errorOut(e, "TrimOligos", "stripIBarcode");
486                 exit(1);
487         }
488         
489 }
490 //*******************************************************************/
491 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
492         try {
493                 //look for forward barcode
494                 string rawFSequence = forwardSeq.getUnaligned();
495         string rawRSequence = reverseSeq.getUnaligned();
496                 int success = pdiffs + 1;       //guilty until proven innocent
497                 
498                 //can you find the forward barcode
499                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
500                         string foligo = it->second.forward;
501             string roligo = it->second.reverse;
502             
503                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
504                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
505                                 break;  
506                         }
507             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
508                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
509                                 break;  
510                         }
511                         
512                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
513                                 group = it->first;
514                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
515                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
516                 forwardQual.trimQScores(foligo.length(), -1);
517                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
518                                 success = 0;
519                                 break;
520                         }
521                 }
522                 
523                 //if you found the barcode or if you don't want to allow for diffs
524                 if ((pdiffs == 0) || (success == 0)) { return success;  }
525                 else { //try aligning and see if you can find it
526                         Alignment* alignment;
527                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
528                         else{ alignment = NULL; } 
529                         
530                         //can you find the barcode
531                         int minDiff = 1e6;
532                         int minCount = 1;
533                         vector< vector<int> > minFGroup;
534                         vector<int> minFPos; 
535             
536             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
537             /*
538              1   Sarah   Westcott
539              2   John    Westcott
540              3   Anna    Westcott
541              4   Sarah   Schloss
542              5   Pat     Schloss
543              6   Gail    Brown
544              7   Pat     Moore
545              
546              only want to look for forward = Sarah, John, Anna, Pat, Gail
547              reverse = Westcott, Schloss, Brown, Moore
548              
549              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
550              */
551             //cout << endl << forwardSeq.getName() << endl;         
552                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
553                                 string oligo = it->first;
554                                 
555                                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
556                                         success = pdiffs + 10;
557                                         break;
558                                 }
559                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
560                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
561                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
562                                 oligo = alignment->getSeqAAln();
563                                 string temp = alignment->getSeqBAln();
564                 
565                                 int alnLength = oligo.length();
566                                 
567                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
568                                 oligo = oligo.substr(0,alnLength);
569                                 temp = temp.substr(0,alnLength);
570                 int numDiff = countDiffs(oligo, temp);
571                                 
572                 if (alnLength == 0) { numDiff = pdiffs + 100; }
573                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
574                 
575                                 if(numDiff < minDiff){
576                                         minDiff = numDiff;
577                                         minCount = 1;
578                                         minFGroup.clear();
579                     minFGroup.push_back(it->second);
580                                         int tempminFPos = 0;
581                     minFPos.clear();
582                                         for(int i=0;i<alnLength;i++){
583                                                 if(temp[i] != '-'){
584                                                         tempminFPos++;
585                                                 }
586                                         }
587                     minFPos.push_back(tempminFPos);
588                                 }else if(numDiff == minDiff){
589                                         minFGroup.push_back(it->second);
590                     int tempminFPos = 0;
591                                         for(int i=0;i<alnLength;i++){
592                                                 if(temp[i] != '-'){
593                                                         tempminFPos++;
594                                                 }
595                                         }
596                     minFPos.push_back(tempminFPos);
597                                 }
598                         }
599             
600                         //cout << minDiff << '\t' << minCount << '\t' << endl;
601                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
602                         else{   
603                 //check for reverse match
604                 if (alignment != NULL) {  delete alignment;  }
605                 
606                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
607                 else{ alignment = NULL; } 
608                 
609                 //can you find the barcode
610                 minDiff = 1e6;
611                 minCount = 1;
612                 vector< vector<int> > minRGroup;
613                 vector<int> minRPos;
614                 
615                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
616                     string oligo = it->first;
617                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
618                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
619                         success = pdiffs + 10;
620                         break;
621                     }
622                     
623                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
624                     alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
625                     oligo = alignment->getSeqAAln();
626                     string temp = alignment->getSeqBAln();
627                     
628                     int alnLength = oligo.length();
629                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
630                     oligo = oligo.substr(0,alnLength);
631                     temp = temp.substr(0,alnLength);
632                     int numDiff = countDiffs(oligo, temp);
633                     if (alnLength == 0) { numDiff = pdiffs + 100; }
634                     
635                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
636                     if(numDiff < minDiff){
637                         minDiff = numDiff;
638                         minCount = 1;
639                         minRGroup.clear();
640                         minRGroup.push_back(it->second);
641                         int tempminRPos = 0;
642                         minRPos.clear();
643                         for(int i=0;i<alnLength;i++){
644                             if(temp[i] != '-'){
645                                 tempminRPos++;
646                             }
647                         }
648                         minRPos.push_back(tempminRPos);                    
649                     }else if(numDiff == minDiff){
650                         int tempminRPos = 0;
651                         for(int i=0;i<alnLength;i++){
652                             if(temp[i] != '-'){
653                                 tempminRPos++;
654                             }
655                         }
656                         minRPos.push_back(tempminRPos);  
657                         minRGroup.push_back(it->second);
658                     }
659                     
660                 }
661                 
662                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
663                  for (int i = 0; i < minFGroup.size(); i++) { 
664                  cout << i << '\t';
665                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
666                  cout << endl;
667                  }
668                  cout << endl;
669                  for (int i = 0; i < minRGroup.size(); i++) { 
670                  cout << i << '\t';
671                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
672                  cout << endl;
673                  }
674                  cout << endl;*/
675                 if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
676                 else  {  
677                     bool foundMatch = false;
678                     vector<int> matches;
679                     for (int i = 0; i < minFGroup.size(); i++) {
680                         for (int j = 0; j < minFGroup[i].size(); j++) {
681                             for (int k = 0; k < minRGroup.size(); k++) {
682                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
683                             }
684                         }
685                     }
686                     
687                     int fStart = 0;
688                     int rStart = 0;
689                     if (matches.size() == 1) { 
690                         foundMatch = true;
691                         group = matches[0]; 
692                         fStart = minFPos[0];
693                         rStart = minRPos[0];
694                     }
695                     
696                     //we have an acceptable match for the forward and reverse, but do they match?
697                     if (foundMatch) {
698                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
699                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
700                         forwardQual.trimQScores(fStart, -1);
701                         reverseQual.trimQScores(rStart, -1);
702                         success = minDiff;
703                     }else { success = pdiffs + 100;     }
704                 }
705                         }
706                         
707                         if (alignment != NULL) {  delete alignment;  }
708                 }
709                 
710                 return success;
711                 
712         }
713         catch(exception& e) {
714                 m->errorOut(e, "TrimOligos", "stripIForward");
715                 exit(1);
716         }
717         
718 }
719 //*******************************************************************/
720 int TrimOligos::stripBarcode(Sequence& seq, int& group){
721         try {
722                 
723                 string rawSequence = seq.getUnaligned();
724                 int success = bdiffs + 1;       //guilty until proven innocent
725                 
726                 //can you find the barcode
727                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
728                         string oligo = it->first;
729                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
730                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
731                                 break;  
732                         }
733                         
734                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
735                                 group = it->second;
736                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
737                                 
738                                 success = 0;
739                                 break;
740                         }
741                 }
742                 
743                 //if you found the barcode or if you don't want to allow for diffs
744                 if ((bdiffs == 0) || (success == 0)) { return success;  }
745                 
746                 else { //try aligning and see if you can find it
747             Alignment* alignment;
748                         if (barcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));  }
749                         else{ alignment = NULL; } 
750                         
751                         //can you find the barcode
752                         int minDiff = 1e6;
753                         int minCount = 1;
754                         int minGroup = -1;
755                         int minPos = 0;
756                         
757                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
758                                 string oligo = it->first;
759                                 //                              int length = oligo.length();
760                                 
761                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
762                                         success = bdiffs + 10;
763                                         break;
764                                 }
765                                 
766                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
767                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
768                                 oligo = alignment->getSeqAAln();
769                                 string temp = alignment->getSeqBAln();
770                                  
771                                 int alnLength = oligo.length();
772                                 
773                                 for(int i=oligo.length()-1;i>=0;i--){
774                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
775                                 }
776                                 oligo = oligo.substr(0,alnLength);
777                                 temp = temp.substr(0,alnLength);
778                                  
779                                 int numDiff = countDiffs(oligo, temp);
780                                 
781                                 if(numDiff < minDiff){
782                                         minDiff = numDiff;
783                                         minCount = 1;
784                                         minGroup = it->second;
785                                         minPos = 0;
786                                         for(int i=0;i<alnLength;i++){
787                                                 if(temp[i] != '-'){
788                                                         minPos++;
789                                                 }
790                                         }
791                                 }
792                                 else if(numDiff == minDiff){
793                                         minCount++;
794                                 }
795                                 
796                         }
797                         
798                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
799                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
800                         else{                                                                                                   //use the best match
801                                 group = minGroup;
802                                 seq.setUnaligned(rawSequence.substr(minPos));
803                                 success = minDiff;
804                         }
805                         
806                         if (alignment != NULL) {  delete alignment;  }
807                         
808                 }
809                 
810                 return success;
811                 
812         }
813         catch(exception& e) {
814                 m->errorOut(e, "TrimOligos", "stripBarcode");
815                 exit(1);
816         }
817         
818 }
819
820 /********************************************************************/
821 int TrimOligos::stripForward(Sequence& seq, int& group){
822         try {
823                 
824                 string rawSequence = seq.getUnaligned();
825                 int success = pdiffs + 1;       //guilty until proven innocent
826                 
827                 //can you find the primer
828                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
829                         string oligo = it->first;
830                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
831                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
832                                 break;  
833                         }
834                         
835                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
836                                 group = it->second;
837                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
838                                 success = 0;
839                                 break;
840                         }
841                 }
842                 
843                 //if you found the barcode or if you don't want to allow for diffs
844                 if ((pdiffs == 0) || (success == 0)) {  return success;  }
845                 
846                 else { //try aligning and see if you can find it
847                         Alignment* alignment;
848                         if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
849             else{ alignment = NULL; } 
850                         
851                         //can you find the barcode
852                         int minDiff = 1e6;
853                         int minCount = 1;
854                         int minGroup = -1;
855                         int minPos = 0;
856                         
857                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
858                                 string oligo = it->first;
859                                 //                              int length = oligo.length();
860                                 
861                                 if(rawSequence.length() < maxFPrimerLength){    
862                                         success = pdiffs + 100;
863                                         break;
864                                 }
865                                 
866                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
867                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
868                                 oligo = alignment->getSeqAAln();
869                                 string temp = alignment->getSeqBAln();
870                                 
871                                 int alnLength = oligo.length();
872                                 
873                                 for(int i=oligo.length()-1;i>=0;i--){
874                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
875                                 }
876                                 oligo = oligo.substr(0,alnLength);
877                                 temp = temp.substr(0,alnLength);
878                                 
879                                 int numDiff = countDiffs(oligo, temp);
880                                 
881                                 if(numDiff < minDiff){
882                                         minDiff = numDiff;
883                                         minCount = 1;
884                                         minGroup = it->second;
885                                         minPos = 0;
886                                         for(int i=0;i<alnLength;i++){
887                                                 if(temp[i] != '-'){
888                                                         minPos++;
889                                                 }
890                                         }
891                                 }
892                                 else if(numDiff == minDiff){
893                                         minCount++;
894                                 }
895                                 
896                         }
897                         
898                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
899                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
900                         else{                                                                                                   //use the best match
901                                 group = minGroup;
902                                 seq.setUnaligned(rawSequence.substr(minPos));
903                                 success = minDiff;
904                         }
905                         
906                         if (alignment != NULL) {  delete alignment;  }
907                         
908                 }
909                 
910                 return success;
911                 
912         }
913         catch(exception& e) {
914                 m->errorOut(e, "TrimOligos", "stripForward");
915                 exit(1);
916         }
917 }
918 //*******************************************************************/
919 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
920         try {
921                 string rawSequence = seq.getUnaligned();
922                 int success = pdiffs + 1;       //guilty until proven innocent
923                 
924                 //can you find the primer
925                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
926                         string oligo = it->first;
927                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
928                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
929                                 break;  
930                         }
931                         
932                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
933                                 group = it->second;
934                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
935                                 if(qual.getName() != ""){
936                                         if (!keepForward) {  qual.trimQScores(oligo.length(), -1); }
937                                 }
938                                 success = 0;
939                                 break;
940                         }
941                 }
942                 
943                 //if you found the barcode or if you don't want to allow for diffs
944                 if ((pdiffs == 0) || (success == 0)) { return success;  }
945                 
946                 else { //try aligning and see if you can find it
947                         Alignment* alignment;
948                         if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
949                         else{ alignment = NULL; } 
950                         
951                         //can you find the barcode
952                         int minDiff = 1e6;
953                         int minCount = 1;
954                         int minGroup = -1;
955                         int minPos = 0;
956                         
957                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
958                                 string oligo = it->first;
959                                 //                              int length = oligo.length();
960                                 
961                                 if(rawSequence.length() < maxFPrimerLength){    
962                                         success = pdiffs + 100;
963                                         break;
964                                 }
965                                 
966                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
967                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
968                                 oligo = alignment->getSeqAAln();
969                                 string temp = alignment->getSeqBAln();
970                                 
971                                 int alnLength = oligo.length();
972                                 
973                                 for(int i=oligo.length()-1;i>=0;i--){
974                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
975                                 }
976                                 oligo = oligo.substr(0,alnLength);
977                                 temp = temp.substr(0,alnLength);
978                                 
979                                 int numDiff = countDiffs(oligo, temp);
980                                 
981                                 if(numDiff < minDiff){
982                                         minDiff = numDiff;
983                                         minCount = 1;
984                                         minGroup = it->second;
985                                         minPos = 0;
986                                         for(int i=0;i<alnLength;i++){
987                                                 if(temp[i] != '-'){
988                                                         minPos++;
989                                                 }
990                                         }
991                                 }
992                                 else if(numDiff == minDiff){
993                                         minCount++;
994                                 }
995                                 
996                         }
997                         
998                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
999                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1000                         else{                                                                                                   //use the best match
1001                                 group = minGroup;
1002                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
1003                                 if(qual.getName() != ""){
1004                                         if (!keepForward) { qual.trimQScores(minPos, -1); }
1005                                 }
1006                                 success = minDiff;
1007                         }
1008                         
1009                         if (alignment != NULL) {  delete alignment;  }
1010                         
1011                 }
1012                 
1013                 return success;
1014                 
1015         }
1016         catch(exception& e) {
1017                 m->errorOut(e, "TrimOligos", "stripForward");
1018                 exit(1);
1019         }
1020 }
1021 //******************************************************************/
1022 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
1023         try {
1024                 string rawSequence = seq.getUnaligned();
1025                 bool success = 0;       //guilty until proven innocent
1026                 
1027                 for(int i=0;i<revPrimer.size();i++){
1028                         string oligo = revPrimer[i];
1029                         
1030                         if(rawSequence.length() < oligo.length()){
1031                                 success = 0;
1032                                 break;
1033                         }
1034                         
1035                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1036                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1037                                 if(qual.getName() != ""){
1038                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1039                                 }
1040                                 success = 1;
1041                                 break;
1042                         }
1043                 }       
1044                 return success;
1045                 
1046         }
1047         catch(exception& e) {
1048                 m->errorOut(e, "TrimOligos", "stripReverse");
1049                 exit(1);
1050         }
1051 }
1052 //******************************************************************/
1053 bool TrimOligos::stripReverse(Sequence& seq){
1054         try {
1055                 
1056                 string rawSequence = seq.getUnaligned();
1057                 bool success = 0;       //guilty until proven innocent
1058                 
1059                 for(int i=0;i<revPrimer.size();i++){
1060                         string oligo = revPrimer[i];
1061                         
1062                         if(rawSequence.length() < oligo.length()){
1063                                 success = 0;
1064                                 break;
1065                         }
1066                         
1067                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1068                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1069                                 success = 1;
1070                                 break;
1071                         }
1072                 }       
1073                 
1074                 return success;
1075                 
1076         }
1077         catch(exception& e) {
1078                 m->errorOut(e, "TrimOligos", "stripReverse");
1079                 exit(1);
1080         }
1081 }
1082 //******************************************************************/
1083 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
1084         try {
1085                 string rawSequence = seq.getUnaligned();
1086                 bool success = ldiffs + 1;      //guilty until proven innocent
1087                 
1088                 for(int i=0;i<linker.size();i++){
1089                         string oligo = linker[i];
1090                         
1091                         if(rawSequence.length() < oligo.length()){
1092                                 success = ldiffs + 10;
1093                                 break;
1094                         }
1095                         
1096                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1097                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1098                                 if(qual.getName() != ""){
1099                                         qual.trimQScores(oligo.length(), -1);
1100                                 }
1101                                 success = 0;
1102                                 break;
1103                         }
1104                 }
1105         
1106         //if you found the linker or if you don't want to allow for diffs
1107                 if ((ldiffs == 0) || (success == 0)) { return success;  }
1108                 
1109                 else { //try aligning and see if you can find it
1110                         Alignment* alignment;
1111                         if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));   }     
1112                         else{ alignment = NULL; } 
1113                         
1114                         //can you find the barcode
1115                         int minDiff = 1e6;
1116                         int minCount = 1;
1117                         int minPos = 0;
1118                         
1119                         for(int i = 0; i < linker.size(); i++){
1120                                 string oligo = linker[i];
1121                                 //                              int length = oligo.length();
1122                                 
1123                                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
1124                                         success = ldiffs + 10;
1125                                         break;
1126                                 }
1127                                 
1128                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1129                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1130                                 oligo = alignment->getSeqAAln();
1131                                 string temp = alignment->getSeqBAln();
1132                                 
1133                                 int alnLength = oligo.length();
1134                                 
1135                                 for(int i=oligo.length()-1;i>=0;i--){
1136                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1137                                 }
1138                                 oligo = oligo.substr(0,alnLength);
1139                                 temp = temp.substr(0,alnLength);
1140                                 
1141                                 int numDiff = countDiffs(oligo, temp);
1142                                 
1143                                 if(numDiff < minDiff){
1144                                         minDiff = numDiff;
1145                                         minCount = 1;
1146                                         minPos = 0;
1147                                         for(int i=0;i<alnLength;i++){
1148                                                 if(temp[i] != '-'){
1149                                                         minPos++;
1150                                                 }
1151                                         }
1152                                 }
1153                                 else if(numDiff == minDiff){
1154                                         minCount++;
1155                                 }
1156                                 
1157                         }
1158                         
1159                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
1160                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
1161                         else{                                                                                                   //use the best match
1162                                 seq.setUnaligned(rawSequence.substr(minPos));
1163                                 
1164                                 if(qual.getName() != ""){
1165                                         qual.trimQScores(minPos, -1);
1166                                 }
1167                                 success = minDiff;
1168                         }
1169                         
1170                         if (alignment != NULL) {  delete alignment;  }
1171                         
1172                 }
1173
1174        
1175                 return success;
1176                 
1177         }
1178         catch(exception& e) {
1179                 m->errorOut(e, "TrimOligos", "stripLinker");
1180                 exit(1);
1181         }
1182 }
1183 //******************************************************************/
1184 bool TrimOligos::stripLinker(Sequence& seq){
1185         try {
1186                 
1187                 string rawSequence = seq.getUnaligned();
1188                 bool success = ldiffs +1;       //guilty until proven innocent
1189                 
1190                 for(int i=0;i<linker.size();i++){
1191                         string oligo = linker[i];
1192                         
1193                         if(rawSequence.length() < oligo.length()){
1194                                 success = ldiffs +10;
1195                                 break;
1196                         }
1197                         
1198                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1199                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1200                                 success = 0;
1201                                 break;
1202                         }
1203                 }       
1204                 
1205         //if you found the linker or if you don't want to allow for diffs
1206                 if ((ldiffs == 0) || (success == 0)) { return success;  }
1207                 
1208                 else { //try aligning and see if you can find it
1209                         Alignment* alignment;
1210                         if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));  }
1211             else{ alignment = NULL; } 
1212                         
1213                         //can you find the barcode
1214                         int minDiff = 1e6;
1215                         int minCount = 1;
1216                         int minPos = 0;
1217                         
1218                         for(int i = 0; i < linker.size(); i++){
1219                                 string oligo = linker[i];
1220                                 //                              int length = oligo.length();
1221                                 
1222                                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
1223                                         success = ldiffs + 10;
1224                                         break;
1225                                 }
1226                                 
1227                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1228                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1229                                 oligo = alignment->getSeqAAln();
1230                                 string temp = alignment->getSeqBAln();
1231                                 
1232                                 int alnLength = oligo.length();
1233                                 
1234                                 for(int i=oligo.length()-1;i>=0;i--){
1235                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1236                                 }
1237                                 oligo = oligo.substr(0,alnLength);
1238                                 temp = temp.substr(0,alnLength);
1239                                 
1240                                 int numDiff = countDiffs(oligo, temp);
1241                                 
1242                                 if(numDiff < minDiff){
1243                                         minDiff = numDiff;
1244                                         minCount = 1;
1245                                         minPos = 0;
1246                                         for(int i=0;i<alnLength;i++){
1247                                                 if(temp[i] != '-'){
1248                                                         minPos++;
1249                                                 }
1250                                         }
1251                                 }
1252                                 else if(numDiff == minDiff){
1253                                         minCount++;
1254                                 }
1255                                 
1256                         }
1257                         
1258                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
1259                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
1260                         else{                                                                                                   //use the best match
1261                                 seq.setUnaligned(rawSequence.substr(minPos));
1262                                 success = minDiff;
1263                         }
1264                         
1265                         if (alignment != NULL) {  delete alignment;  }
1266                         
1267                 }
1268
1269                 return success;
1270                 
1271         }
1272         catch(exception& e) {
1273                 m->errorOut(e, "TrimOligos", "stripLinker");
1274                 exit(1);
1275         }
1276 }
1277
1278 //******************************************************************/
1279 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
1280         try {
1281                 string rawSequence = seq.getUnaligned();
1282                 bool success = sdiffs+1;        //guilty until proven innocent
1283                 
1284                 for(int i=0;i<spacer.size();i++){
1285                         string oligo = spacer[i];
1286                         
1287                         if(rawSequence.length() < oligo.length()){
1288                                 success = sdiffs+10;
1289                                 break;
1290                         }
1291                         
1292                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1293                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1294                                 if(qual.getName() != ""){
1295                                         qual.trimQScores(oligo.length(), -1);
1296                                 }
1297                                 success = 0;
1298                                 break;
1299                         }
1300                 }
1301         
1302         //if you found the spacer or if you don't want to allow for diffs
1303                 if ((sdiffs == 0) || (success == 0)) { return success;  }
1304                 
1305                 else { //try aligning and see if you can find it
1306                         Alignment* alignment;
1307                         if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
1308             else{ alignment = NULL; } 
1309                         
1310                         //can you find the barcode
1311                         int minDiff = 1e6;
1312                         int minCount = 1;
1313                         int minPos = 0;
1314                         
1315                         for(int i = 0; i < spacer.size(); i++){
1316                                 string oligo = spacer[i];
1317                                 //                              int length = oligo.length();
1318                                 
1319                                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
1320                                         success = sdiffs + 10;
1321                                         break;
1322                                 }
1323                                 
1324                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1325                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1326                                 oligo = alignment->getSeqAAln();
1327                                 string temp = alignment->getSeqBAln();
1328                                 
1329                                 int alnLength = oligo.length();
1330                                 
1331                                 for(int i=oligo.length()-1;i>=0;i--){
1332                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1333                                 }
1334                                 oligo = oligo.substr(0,alnLength);
1335                                 temp = temp.substr(0,alnLength);
1336                                 
1337                                 int numDiff = countDiffs(oligo, temp);
1338                                 
1339                                 if(numDiff < minDiff){
1340                                         minDiff = numDiff;
1341                                         minCount = 1;
1342                                         minPos = 0;
1343                                         for(int i=0;i<alnLength;i++){
1344                                                 if(temp[i] != '-'){
1345                                                         minPos++;
1346                                                 }
1347                                         }
1348                                 }
1349                                 else if(numDiff == minDiff){
1350                                         minCount++;
1351                                 }
1352                                 
1353                         }
1354                         
1355                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
1356                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
1357                         else{                                                                                                   //use the best match
1358                                 seq.setUnaligned(rawSequence.substr(minPos));
1359                                 
1360                                 if(qual.getName() != ""){
1361                                         qual.trimQScores(minPos, -1);
1362                                 }
1363                                 success = minDiff;
1364                         }
1365                         
1366                         if (alignment != NULL) {  delete alignment;  }
1367                         
1368                 }
1369         
1370
1371                 return success;
1372                 
1373         }
1374         catch(exception& e) {
1375                 m->errorOut(e, "TrimOligos", "stripSpacer");
1376                 exit(1);
1377         }
1378 }
1379 //******************************************************************/
1380 bool TrimOligos::stripSpacer(Sequence& seq){
1381         try {
1382                 
1383                 string rawSequence = seq.getUnaligned();
1384                 bool success = sdiffs+1;        //guilty until proven innocent
1385                 
1386                 for(int i=0;i<spacer.size();i++){
1387                         string oligo = spacer[i];
1388                         
1389                         if(rawSequence.length() < oligo.length()){
1390                                 success = sdiffs+10;
1391                                 break;
1392                         }
1393                         
1394                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1395                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1396                                 success = 0;
1397                                 break;
1398                         }
1399                 }       
1400                 
1401         //if you found the spacer or if you don't want to allow for diffs
1402                 if ((sdiffs == 0) || (success == 0)) { return success;  }
1403                 
1404                 else { //try aligning and see if you can find it
1405                         Alignment* alignment;
1406                         if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
1407             else{ alignment = NULL; } 
1408                         
1409                         //can you find the barcode
1410                         int minDiff = 1e6;
1411                         int minCount = 1;
1412                         int minPos = 0;
1413                         
1414                         for(int i = 0; i < spacer.size(); i++){
1415                                 string oligo = spacer[i];
1416                                 //                              int length = oligo.length();
1417                                 
1418                                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
1419                                         success = sdiffs + 10;
1420                                         break;
1421                                 }
1422                                 
1423                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1424                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1425                                 oligo = alignment->getSeqAAln();
1426                                 string temp = alignment->getSeqBAln();
1427                                 
1428                                 int alnLength = oligo.length();
1429                                 
1430                                 for(int i=oligo.length()-1;i>=0;i--){
1431                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1432                                 }
1433                                 oligo = oligo.substr(0,alnLength);
1434                                 temp = temp.substr(0,alnLength);
1435                                 
1436                                 int numDiff = countDiffs(oligo, temp);
1437                                 
1438                                 if(numDiff < minDiff){
1439                                         minDiff = numDiff;
1440                                         minCount = 1;
1441                                         minPos = 0;
1442                                         for(int i=0;i<alnLength;i++){
1443                                                 if(temp[i] != '-'){
1444                                                         minPos++;
1445                                                 }
1446                                         }
1447                                 }
1448                                 else if(numDiff == minDiff){
1449                                         minCount++;
1450                                 }
1451                                 
1452                         }
1453                         
1454                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
1455                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
1456                         else{                                                                                                   //use the best match
1457                                 seq.setUnaligned(rawSequence.substr(minPos));
1458                                 success = minDiff;
1459                         }
1460                         
1461                         if (alignment != NULL) {  delete alignment;  }
1462                         
1463                 }
1464
1465                 return success;
1466                 
1467         }
1468         catch(exception& e) {
1469                 m->errorOut(e, "TrimOligos", "stripSpacer");
1470                 exit(1);
1471         }
1472 }
1473
1474 //******************************************************************/
1475 bool TrimOligos::compareDNASeq(string oligo, string seq){
1476         try {
1477                 bool success = 1;
1478                 int length = oligo.length();
1479                 
1480                 for(int i=0;i<length;i++){
1481                         
1482                         if(oligo[i] != seq[i]){
1483                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1484                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1485                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1486                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1487                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1488                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1489                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1490                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1491                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1492                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1493                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1494                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1495                                 
1496                                 if(success == 0)        {       break;   }
1497                         }
1498                         else{
1499                                 success = 1;
1500                         }
1501                 }
1502                 
1503                 return success;
1504         }
1505         catch(exception& e) {
1506                 m->errorOut(e, "TrimOligos", "compareDNASeq");
1507                 exit(1);
1508         }
1509         
1510 }
1511 //********************************************************************/
1512 int TrimOligos::countDiffs(string oligo, string seq){
1513         try {
1514                 
1515                 int length = oligo.length();
1516                 int countDiffs = 0;
1517                 
1518                 for(int i=0;i<length;i++){
1519                         
1520                         if(oligo[i] != seq[i]){
1521                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1522                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1523                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1524                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1525                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1526                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1527                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1528                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1529                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1530                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1531                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1532                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1533                         }
1534                         
1535                 }
1536                 
1537                 return countDiffs;
1538         }
1539         catch(exception& e) {
1540                 m->errorOut(e, "TrimOligos", "countDiffs");
1541                 exit(1);
1542         }
1543 }
1544 /********************************************************************/
1545
1546
1547