]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / trimoligos.cpp
1 /*
2  *  trimoligos.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/1/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18         try {
19                 m = MothurOut::getInstance();
20                 
21                 pdiffs = p;
22                 bdiffs = b;
23         ldiffs = l;
24         sdiffs = s;
25                 
26                 barcodes = br;
27                 primers = pr;
28                 revPrimer = r;
29         linker = lk;
30         spacer = sp;
31         maxFBarcodeLength = 0;
32         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
33             if(it->first.length() > maxFBarcodeLength){
34                 maxFBarcodeLength = it->first.length();
35             }
36         }
37         maxFPrimerLength = 0;
38         map<string,int>::iterator it; 
39         for(it=primers.begin();it!=primers.end();it++){
40             if(it->first.length() > maxFPrimerLength){
41                 maxFPrimerLength = it->first.length();
42             }
43         }
44         
45         maxLinkerLength = 0;
46         for(int i = 0; i < linker.size(); i++){
47             if(linker[i].length() > maxLinkerLength){
48                 maxLinkerLength = linker[i].length();
49             }
50         }
51         
52         maxSpacerLength = 0;
53         for(int i = 0; i < spacer.size(); i++){
54             if(spacer[i].length() > maxSpacerLength){
55                 maxSpacerLength = spacer[i].length();
56             }
57         }
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "TrimOligos", "TrimOligos");
61                 exit(1);
62         }
63 }
64 /********************************************************************/
65 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
66 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
67         try {
68                 m = MothurOut::getInstance();
69                 
70                 pdiffs = p;
71                 bdiffs = b;
72         ldiffs = l;
73         sdiffs = s;
74                 
75         maxFBarcodeLength = 0;
76         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
77             string forward = it->second.forward;
78             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
79             
80             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length();  }
81             
82             if (itForward == ifbarcodes.end()) {
83                 vector<int> temp; temp.push_back(it->first);
84                 ifbarcodes[forward] = temp;
85             }else { itForward->second.push_back(it->first); }
86         }
87
88         maxFPrimerLength = 0;
89         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
90             string forward = it->second.forward;
91             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
92             
93             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length();  }
94             
95             if (itForward == ifprimers.end()) {
96                 vector<int> temp; temp.push_back(it->first);
97                 ifprimers[forward] = temp;
98             }else { itForward->second.push_back(it->first); }
99         }
100         
101         maxRBarcodeLength = 0;
102         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
103             string forward = it->second.reverse;
104             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
105             
106             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length();  }
107             
108             if (itForward == irbarcodes.end()) {
109                 vector<int> temp; temp.push_back(it->first);
110                 irbarcodes[forward] = temp;
111             }else { itForward->second.push_back(it->first); }
112         }
113         
114         maxRPrimerLength = 0;
115         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
116             string forward = it->second.reverse;
117             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
118             
119             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length();  }
120             
121             if (itForward == irprimers.end()) {
122                 vector<int> temp; temp.push_back(it->first);
123                 irprimers[forward] = temp;
124             }else { itForward->second.push_back(it->first); }
125         }
126
127         ipbarcodes = br;
128         ipprimers = pr;
129         }
130         catch(exception& e) {
131                 m->errorOut(e, "TrimOligos", "TrimOligos");
132                 exit(1);
133         }
134 }
135 /********************************************************************/
136 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
137 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
138         try {
139                 m = MothurOut::getInstance();
140                 
141                 pdiffs = p;
142                 bdiffs = b;
143                 
144                 barcodes = br;
145                 primers = pr;
146                 revPrimer = r;
147         
148         maxFBarcodeLength = 0;
149         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
150             string oligo = it->first;
151             if(oligo.length() > maxFBarcodeLength){
152                 maxFBarcodeLength = oligo.length();
153             }
154         }
155         maxRBarcodeLength = maxFBarcodeLength;
156         
157         maxFPrimerLength = 0;
158         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
159             string oligo = it->first;
160             if(oligo.length() > maxFPrimerLength){
161                 maxFPrimerLength = oligo.length();
162             }
163         }
164         maxRPrimerLength = maxFPrimerLength;
165         }
166         catch(exception& e) {
167                 m->errorOut(e, "TrimOligos", "TrimOligos");
168                 exit(1);
169         }
170 }
171 /********************************************************************/
172 TrimOligos::~TrimOligos() {}
173 //********************************************************************/
174 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
175         try {
176                 
177                 string rawSequence = seq.getUnaligned();
178                 
179                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
180                         string oligo = it->first;
181                         
182                         if(rawSequence.length() < oligo.length()) {  break;  }
183                         
184                         //search for primer
185             int olength = oligo.length();
186             for (int j = 0; j < rawSequence.length()-olength; j++){
187                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
188                 string rawChunk = rawSequence.substr(j, olength);
189                 if(compareDNASeq(oligo, rawChunk)) {
190                     primerStart = j;
191                     primerEnd = primerStart + olength;
192                     return true;
193                 }
194                 
195             }
196         }       
197                 
198         primerStart = 0; primerEnd = 0;
199         //if you don't want to allow for diffs
200                 if (pdiffs == 0) { return false;  }
201                 else { //try aligning and see if you can find it
202                                                 
203                         //can you find the barcode
204                         int minDiff = 1e6;
205                         int minCount = 1;
206                         
207             Alignment* alignment;
208             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
209             else{ alignment = NULL; } 
210             
211                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
212                 
213                 string prim = it->first;
214                 //search for primer
215                 int olength = prim.length();
216                 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
217                 else{
218                     for (int j = 0; j < rawSequence.length()-olength; j++){
219                         
220                         string oligo = it->first;
221
222                         if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
223                                 
224                         string rawChunk = rawSequence.substr(j, olength+pdiffs);
225                         
226                         //use needleman to align first primer.length()+numdiffs of sequence to each barcode
227                         alignment->align(oligo, rawChunk);
228                         oligo = alignment->getSeqAAln();
229                         string temp = alignment->getSeqBAln();
230                                 
231                         int alnLength = oligo.length();
232                                 
233                         for(int i=oligo.length()-1;i>=0;i--){
234                             if(oligo[i] != '-'){        alnLength = i+1;        break;  }
235                         }
236                         oligo = oligo.substr(0,alnLength);
237                         temp = temp.substr(0,alnLength);
238                                 
239                         int numDiff = countDiffs(oligo, temp);
240                                 
241                         if(numDiff < minDiff){
242                             minDiff = numDiff;
243                             minCount = 1;
244                             primerStart = j;
245                             primerEnd = primerStart + alnLength;
246                         }else if(numDiff == minDiff){ minCount++; }
247                     }
248                 }
249                         }
250                         
251             if (alignment != NULL) {  delete alignment;  }
252             
253                         if(minDiff > pdiffs)    {       primerStart = 0; primerEnd = 0; return false;           }       //no good matches
254                         else if(minCount > 1)   {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
255                         else{  return true; }
256                 }
257        
258         primerStart = 0; primerEnd = 0;
259                 return false;
260                 
261         }
262         catch(exception& e) {
263                 m->errorOut(e, "TrimOligos", "stripForward");
264                 exit(1);
265         }
266 }
267 //******************************************************************/
268 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
269         try {
270                 
271                 string rawSequence = seq.getUnaligned();
272                 
273                 for(int i=0;i<revPrimer.size();i++){
274                         string oligo = revPrimer[i];
275                         if(rawSequence.length() < oligo.length()) {  break;  }
276                         
277                         //search for primer
278             int olength = oligo.length();
279             for (int j = rawSequence.length()-olength; j >= 0; j--){
280                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
281                 string rawChunk = rawSequence.substr(j, olength);
282                 
283                 if(compareDNASeq(oligo, rawChunk)) {
284                     primerStart = j;
285                     primerEnd = primerStart + olength;
286                     return true;
287                 }
288                 
289             }
290                 }       
291                 
292         primerStart = 0; primerEnd = 0;
293                 return false;
294         }
295         catch(exception& e) {
296                 m->errorOut(e, "PcrSeqsCommand", "findReverse");
297                 exit(1);
298         }
299 }
300 //*******************************************************************/
301
302 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
303         try {
304                 
305                 string rawSequence = seq.getUnaligned();
306                 int success = bdiffs + 1;       //guilty until proven innocent
307                 
308                 //can you find the barcode
309                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
310                         string oligo = it->first;
311                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
312                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
313                                 break;  
314                         }
315                         
316                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
317                                 group = it->second;
318                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
319                                 
320                                 if(qual.getName() != ""){
321                                         qual.trimQScores(oligo.length(), -1);
322                                 }
323                                 
324                                 success = 0;
325                                 break;
326                         }
327                 }
328                 
329                 //if you found the barcode or if you don't want to allow for diffs
330                 if ((bdiffs == 0) || (success == 0)) { return success;  }
331                 
332                 else { //try aligning and see if you can find it
333                         Alignment* alignment;
334                         if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
335                         else{ alignment = NULL; } 
336                         
337                         //can you find the barcode
338                         int minDiff = 1e6;
339                         int minCount = 1;
340                         int minGroup = -1;
341                         int minPos = 0;
342                         
343                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
344                                 string oligo = it->first;
345                                 //                              int length = oligo.length();
346                                 
347                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
348                                         success = bdiffs + 10;
349                                         break;
350                                 }
351                                 
352                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
353                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
354                                 oligo = alignment->getSeqAAln();
355                                 string temp = alignment->getSeqBAln();
356                                 
357                                 int alnLength = oligo.length();
358                                 
359                                 for(int i=oligo.length()-1;i>=0;i--){
360                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
361                                 }
362                                 oligo = oligo.substr(0,alnLength);
363                                 temp = temp.substr(0,alnLength);
364                 int numDiff = countDiffs(oligo, temp);
365                                 
366                                 if(numDiff < minDiff){
367                                         minDiff = numDiff;
368                                         minCount = 1;
369                                         minGroup = it->second;
370                                         minPos = 0;
371                                         for(int i=0;i<alnLength;i++){
372                                                 if(temp[i] != '-'){
373                                                         minPos++;
374                                                 }
375                                         }
376                                 }
377                                 else if(numDiff == minDiff){
378                                         minCount++;
379                                 }
380                                 
381                         }
382                         
383                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
384                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
385                         else{                                                                                                   //use the best match
386                                 group = minGroup;
387                                 seq.setUnaligned(rawSequence.substr(minPos));
388     
389                                 if(qual.getName() != ""){
390                                         qual.trimQScores(minPos, -1);
391                                 }
392                                 success = minDiff;
393                         }
394                         
395                         if (alignment != NULL) {  delete alignment;  }
396                         
397                 }
398                 
399                 return success;
400                 
401         }
402         catch(exception& e) {
403                 m->errorOut(e, "TrimOligos", "stripBarcode");
404                 exit(1);
405         }
406 }
407 //*******************************************************************/
408 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
409         try {
410                 //look for forward barcode
411                 string rawFSequence = forwardSeq.getUnaligned();
412         string rawRSequence = reverseSeq.getUnaligned();
413                 int success = bdiffs + 1;       //guilty until proven innocent
414                 
415                 //can you find the forward barcode
416                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
417                         string foligo = it->second.forward;
418             string roligo = it->second.reverse;
419             
420                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
421                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
422                                 break;  
423                         }
424             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
425                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
426                                 break;  
427                         }
428                         
429                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
430                                 group = it->first;
431                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
432                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
433                                 success = 0;
434                                 break;
435                         }
436                 }
437                 
438                 //if you found the barcode or if you don't want to allow for diffs
439                 if ((bdiffs == 0) || (success == 0)) { return success;  }
440                 else { //try aligning and see if you can find it
441                         Alignment* alignment;
442                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
443                         else{ alignment = NULL; } 
444                         
445                         //can you find the barcode
446                         int minDiff = 1e6;
447                         int minCount = 1;
448                         vector< vector<int> > minFGroup;
449                         vector<int> minFPos; 
450             
451             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
452             /*
453                 1   Sarah   Westcott
454                 2   John    Westcott
455                 3   Anna    Westcott
456                 4   Sarah   Schloss
457                 5   Pat     Schloss
458                 6   Gail    Brown
459                 7   Pat     Moore
460              
461                 only want to look for forward = Sarah, John, Anna, Pat, Gail
462                                       reverse = Westcott, Schloss, Brown, Moore
463              
464                 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
465              */
466             //cout << endl << forwardSeq.getName() << endl;         
467                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
468                                 string oligo = it->first;
469                                 
470                                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
471                                         success = bdiffs + 10;
472                                         break;
473                                 }
474                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
475                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
476                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
477                                 oligo = alignment->getSeqAAln();
478                                 string temp = alignment->getSeqBAln();
479                
480                                 int alnLength = oligo.length();
481                                 
482                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
483                                 oligo = oligo.substr(0,alnLength);
484                                 temp = temp.substr(0,alnLength);
485                 int numDiff = countDiffs(oligo, temp);
486                                 
487                 if (alnLength == 0) { numDiff = bdiffs + 100; }
488                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
489                 
490                                 if(numDiff < minDiff){
491                                         minDiff = numDiff;
492                                         minCount = 1;
493                                         minFGroup.clear();
494                     minFGroup.push_back(it->second);
495                                         int tempminFPos = 0;
496                     minFPos.clear();
497                                         for(int i=0;i<alnLength;i++){
498                                                 if(temp[i] != '-'){
499                                                         tempminFPos++;
500                                                 }
501                                         }
502                     minFPos.push_back(tempminFPos);
503                                 }else if(numDiff == minDiff){
504                                         minFGroup.push_back(it->second);
505                     int tempminFPos = 0;
506                                         for(int i=0;i<alnLength;i++){
507                                                 if(temp[i] != '-'){
508                                                         tempminFPos++;
509                                                 }
510                                         }
511                     minFPos.push_back(tempminFPos);
512                                 }
513                         }
514             
515                         //cout << minDiff << '\t' << minCount << '\t' << endl;
516                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
517                         else{   
518                 //check for reverse match
519                 if (alignment != NULL) {  delete alignment;  }
520                 
521                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
522                 else{ alignment = NULL; } 
523                 
524                 //can you find the barcode
525                 minDiff = 1e6;
526                 minCount = 1;
527                 vector< vector<int> > minRGroup;
528                 vector<int> minRPos;
529                 
530                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
531                     string oligo = it->first;
532                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
533                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
534                         success = bdiffs + 10;
535                         break;
536                     }
537                     
538                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
539                     alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
540                     oligo = alignment->getSeqAAln();
541                     string temp = alignment->getSeqBAln();
542                     
543                     int alnLength = oligo.length();
544                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
545                     oligo = oligo.substr(0,alnLength);
546                     temp = temp.substr(0,alnLength);
547                     int numDiff = countDiffs(oligo, temp);
548                     if (alnLength == 0) { numDiff = bdiffs + 100; }
549                     
550                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
551                     if(numDiff < minDiff){
552                         minDiff = numDiff;
553                         minCount = 1;
554                         minRGroup.clear();
555                         minRGroup.push_back(it->second);
556                         int tempminRPos = 0;
557                         minRPos.clear();
558                         for(int i=0;i<alnLength;i++){
559                             if(temp[i] != '-'){
560                                 tempminRPos++;
561                             }
562                         }
563                         minRPos.push_back(tempminRPos);                    
564                     }else if(numDiff == minDiff){
565                         int tempminRPos = 0;
566                         for(int i=0;i<alnLength;i++){
567                             if(temp[i] != '-'){
568                                 tempminRPos++;
569                             }
570                         }
571                         minRPos.push_back(tempminRPos);  
572                         minRGroup.push_back(it->second);
573                     }
574                     
575                 }
576                 
577                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
578                 for (int i = 0; i < minFGroup.size(); i++) { 
579                     cout << i << '\t';
580                     for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
581                     cout << endl;
582                 }
583                 cout << endl;
584                 for (int i = 0; i < minRGroup.size(); i++) { 
585                     cout << i << '\t';
586                     for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
587                     cout << endl;
588                 }
589                 cout << endl;*/
590                 if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
591                 else  {  
592                     bool foundMatch = false;
593                     vector<int> matches;
594                     for (int i = 0; i < minFGroup.size(); i++) {
595                         for (int j = 0; j < minFGroup[i].size(); j++) {
596                             for (int k = 0; k < minRGroup.size(); k++) {
597                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
598                             }
599                         }
600                     }
601                     
602                     int fStart = 0;
603                     int rStart = 0;
604                     if (matches.size() == 1) { 
605                         foundMatch = true;
606                         group = matches[0]; 
607                         fStart = minFPos[0];
608                         rStart = minRPos[0];
609                     }
610                     
611                     //we have an acceptable match for the forward and reverse, but do they match?
612                     if (foundMatch) {
613                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
614                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
615                         success = minDiff;
616                     }else { success = bdiffs + 100;     }
617                 }
618                         }
619                         
620                         if (alignment != NULL) {  delete alignment;  }
621                 }
622                 
623                 return success;
624                 
625         }
626         catch(exception& e) {
627                 m->errorOut(e, "TrimOligos", "stripIBarcode");
628                 exit(1);
629         }
630         
631 }
632 //*******************************************************************/
633 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
634         try {
635                 //look for forward barcode
636                 string rawFSequence = forwardSeq.getUnaligned();
637         string rawRSequence = reverseSeq.getUnaligned();
638                 int success = bdiffs + 1;       //guilty until proven innocent
639                 
640                 //can you find the forward barcode
641                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
642                         string foligo = it->second.forward;
643             string roligo = it->second.reverse;
644             
645                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
646                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
647                                 break;  
648                         }
649             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
650                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
651                                 break;  
652                         }
653                         
654                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
655                                 group = it->first;
656                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
657                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
658                 forwardQual.trimQScores(foligo.length(), -1);
659                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
660                                 success = 0;
661                                 break;
662                         }
663                 }
664                 
665                 //if you found the barcode or if you don't want to allow for diffs
666                 if ((bdiffs == 0) || (success == 0)) { return success;  }
667                 else { //try aligning and see if you can find it
668                         Alignment* alignment;
669                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
670                         else{ alignment = NULL; } 
671                         
672                         //can you find the barcode
673                         int minDiff = 1e6;
674                         int minCount = 1;
675                         vector< vector<int> > minFGroup;
676                         vector<int> minFPos; 
677             
678             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
679             /*
680              1   Sarah   Westcott
681              2   John    Westcott
682              3   Anna    Westcott
683              4   Sarah   Schloss
684              5   Pat     Schloss
685              6   Gail    Brown
686              7   Pat     Moore
687              
688              only want to look for forward = Sarah, John, Anna, Pat, Gail
689              reverse = Westcott, Schloss, Brown, Moore
690              
691              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
692              */
693             //cout << endl << forwardSeq.getName() << endl;         
694                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
695                                 string oligo = it->first;
696                                 
697                                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
698                                         success = bdiffs + 10;
699                                         break;
700                                 }
701                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
702                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
703                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
704                                 oligo = alignment->getSeqAAln();
705                                 string temp = alignment->getSeqBAln();
706                 
707                                 int alnLength = oligo.length();
708                                 
709                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
710                                 oligo = oligo.substr(0,alnLength);
711                                 temp = temp.substr(0,alnLength);
712                 int numDiff = countDiffs(oligo, temp);
713                                 
714                 if (alnLength == 0) { numDiff = bdiffs + 100; }
715                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
716                 
717                                 if(numDiff < minDiff){
718                                         minDiff = numDiff;
719                                         minCount = 1;
720                                         minFGroup.clear();
721                     minFGroup.push_back(it->second);
722                                         int tempminFPos = 0;
723                     minFPos.clear();
724                                         for(int i=0;i<alnLength;i++){
725                                                 if(temp[i] != '-'){
726                                                         tempminFPos++;
727                                                 }
728                                         }
729                     minFPos.push_back(tempminFPos);
730                                 }else if(numDiff == minDiff){
731                                         minFGroup.push_back(it->second);
732                     int tempminFPos = 0;
733                                         for(int i=0;i<alnLength;i++){
734                                                 if(temp[i] != '-'){
735                                                         tempminFPos++;
736                                                 }
737                                         }
738                     minFPos.push_back(tempminFPos);
739                                 }
740                         }
741             
742                         //cout << minDiff << '\t' << minCount << '\t' << endl;
743                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
744                         else{   
745                 //check for reverse match
746                 if (alignment != NULL) {  delete alignment;  }
747                 
748                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
749                 else{ alignment = NULL; } 
750                 
751                 //can you find the barcode
752                 minDiff = 1e6;
753                 minCount = 1;
754                 vector< vector<int> > minRGroup;
755                 vector<int> minRPos;
756                 
757                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
758                     string oligo = it->first;
759                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
760                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
761                         success = bdiffs + 10;
762                         break;
763                     }
764                     
765                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
766                     alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
767                     oligo = alignment->getSeqAAln();
768                     string temp = alignment->getSeqBAln();
769                     
770                     int alnLength = oligo.length();
771                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
772                     oligo = oligo.substr(0,alnLength);
773                     temp = temp.substr(0,alnLength);
774                     int numDiff = countDiffs(oligo, temp);
775                     if (alnLength == 0) { numDiff = bdiffs + 100; }
776                     
777                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
778                     if(numDiff < minDiff){
779                         minDiff = numDiff;
780                         minCount = 1;
781                         minRGroup.clear();
782                         minRGroup.push_back(it->second);
783                         int tempminRPos = 0;
784                         minRPos.clear();
785                         for(int i=0;i<alnLength;i++){
786                             if(temp[i] != '-'){
787                                 tempminRPos++;
788                             }
789                         }
790                         minRPos.push_back(tempminRPos);                    
791                     }else if(numDiff == minDiff){
792                         int tempminRPos = 0;
793                         for(int i=0;i<alnLength;i++){
794                             if(temp[i] != '-'){
795                                 tempminRPos++;
796                             }
797                         }
798                         minRPos.push_back(tempminRPos);  
799                         minRGroup.push_back(it->second);
800                     }
801                     
802                 }
803                 
804                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
805                  for (int i = 0; i < minFGroup.size(); i++) { 
806                  cout << i << '\t';
807                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
808                  cout << endl;
809                  }
810                  cout << endl;
811                  for (int i = 0; i < minRGroup.size(); i++) { 
812                  cout << i << '\t';
813                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
814                  cout << endl;
815                  }
816                  cout << endl;*/
817                 if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
818                 else  {  
819                     bool foundMatch = false;
820                     vector<int> matches;
821                     for (int i = 0; i < minFGroup.size(); i++) {
822                         for (int j = 0; j < minFGroup[i].size(); j++) {
823                             for (int k = 0; k < minRGroup.size(); k++) {
824                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
825                             }
826                         }
827                     }
828                     
829                     int fStart = 0;
830                     int rStart = 0;
831                     if (matches.size() == 1) { 
832                         foundMatch = true;
833                         group = matches[0]; 
834                         fStart = minFPos[0];
835                         rStart = minRPos[0];
836                     }
837                     
838                     //we have an acceptable match for the forward and reverse, but do they match?
839                     if (foundMatch) {
840                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
841                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
842                         forwardQual.trimQScores(fStart, -1);
843                         reverseQual.trimQScores(rStart, -1);
844                         success = minDiff;
845                     }else { success = bdiffs + 100;     }
846                 }
847                         }
848                         
849                         if (alignment != NULL) {  delete alignment;  }
850                 }
851                 
852                 return success;
853                 
854         }
855         catch(exception& e) {
856                 m->errorOut(e, "TrimOligos", "stripIBarcode");
857                 exit(1);
858         }
859         
860 }
861 //*******************************************************************/
862 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
863         try {
864                 //look for forward barcode
865                 string rawFSequence = forwardSeq.getUnaligned();
866         string rawRSequence = reverseSeq.getUnaligned();
867                 int success = pdiffs + 1;       //guilty until proven innocent
868                 
869                 //can you find the forward barcode
870                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
871                         string foligo = it->second.forward;
872             string roligo = it->second.reverse;
873             
874                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
875                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
876                                 break;  
877                         }
878             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
879                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
880                                 break;  
881                         }
882                         
883                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
884                                 group = it->first;
885                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
886                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
887                 forwardQual.trimQScores(foligo.length(), -1);
888                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
889                                 success = 0;
890                                 break;
891                         }
892                 }
893                 
894                 //if you found the barcode or if you don't want to allow for diffs
895                 if ((pdiffs == 0) || (success == 0)) { return success;  }
896                 else { //try aligning and see if you can find it
897                         Alignment* alignment;
898                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
899                         else{ alignment = NULL; } 
900                         
901                         //can you find the barcode
902                         int minDiff = 1e6;
903                         int minCount = 1;
904                         vector< vector<int> > minFGroup;
905                         vector<int> minFPos; 
906             
907             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
908             /*
909              1   Sarah   Westcott
910              2   John    Westcott
911              3   Anna    Westcott
912              4   Sarah   Schloss
913              5   Pat     Schloss
914              6   Gail    Brown
915              7   Pat     Moore
916              
917              only want to look for forward = Sarah, John, Anna, Pat, Gail
918              reverse = Westcott, Schloss, Brown, Moore
919              
920              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
921              */
922             //cout << endl << forwardSeq.getName() << endl;         
923                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
924                                 string oligo = it->first;
925                                 
926                                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
927                                         success = pdiffs + 10;
928                                         break;
929                                 }
930                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
931                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
932                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
933                                 oligo = alignment->getSeqAAln();
934                                 string temp = alignment->getSeqBAln();
935                 
936                                 int alnLength = oligo.length();
937                                 
938                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
939                                 oligo = oligo.substr(0,alnLength);
940                                 temp = temp.substr(0,alnLength);
941                 int numDiff = countDiffs(oligo, temp);
942                                 
943                 if (alnLength == 0) { numDiff = pdiffs + 100; }
944                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
945                 
946                                 if(numDiff < minDiff){
947                                         minDiff = numDiff;
948                                         minCount = 1;
949                                         minFGroup.clear();
950                     minFGroup.push_back(it->second);
951                                         int tempminFPos = 0;
952                     minFPos.clear();
953                                         for(int i=0;i<alnLength;i++){
954                                                 if(temp[i] != '-'){
955                                                         tempminFPos++;
956                                                 }
957                                         }
958                     minFPos.push_back(tempminFPos);
959                                 }else if(numDiff == minDiff){
960                                         minFGroup.push_back(it->second);
961                     int tempminFPos = 0;
962                                         for(int i=0;i<alnLength;i++){
963                                                 if(temp[i] != '-'){
964                                                         tempminFPos++;
965                                                 }
966                                         }
967                     minFPos.push_back(tempminFPos);
968                                 }
969                         }
970             
971                         //cout << minDiff << '\t' << minCount << '\t' << endl;
972                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
973                         else{   
974                 //check for reverse match
975                 if (alignment != NULL) {  delete alignment;  }
976                 
977                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
978                 else{ alignment = NULL; } 
979                 
980                 //can you find the barcode
981                 minDiff = 1e6;
982                 minCount = 1;
983                 vector< vector<int> > minRGroup;
984                 vector<int> minRPos;
985                 
986                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
987                     string oligo = it->first;
988                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
989                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
990                         success = pdiffs + 10;
991                         break;
992                     }
993                     
994                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
995                     alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
996                     oligo = alignment->getSeqAAln();
997                     string temp = alignment->getSeqBAln();
998                     
999                     int alnLength = oligo.length();
1000                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1001                     oligo = oligo.substr(0,alnLength);
1002                     temp = temp.substr(0,alnLength);
1003                     int numDiff = countDiffs(oligo, temp);
1004                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1005                     
1006                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1007                     if(numDiff < minDiff){
1008                         minDiff = numDiff;
1009                         minCount = 1;
1010                         minRGroup.clear();
1011                         minRGroup.push_back(it->second);
1012                         int tempminRPos = 0;
1013                         minRPos.clear();
1014                         for(int i=0;i<alnLength;i++){
1015                             if(temp[i] != '-'){
1016                                 tempminRPos++;
1017                             }
1018                         }
1019                         minRPos.push_back(tempminRPos);                    
1020                     }else if(numDiff == minDiff){
1021                         int tempminRPos = 0;
1022                         for(int i=0;i<alnLength;i++){
1023                             if(temp[i] != '-'){
1024                                 tempminRPos++;
1025                             }
1026                         }
1027                         minRPos.push_back(tempminRPos);  
1028                         minRGroup.push_back(it->second);
1029                     }
1030                     
1031                 }
1032                 
1033                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1034                  for (int i = 0; i < minFGroup.size(); i++) { 
1035                  cout << i << '\t';
1036                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1037                  cout << endl;
1038                  }
1039                  cout << endl;
1040                  for (int i = 0; i < minRGroup.size(); i++) { 
1041                  cout << i << '\t';
1042                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1043                  cout << endl;
1044                  }
1045                  cout << endl;*/
1046                 if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1047                 else  {  
1048                     bool foundMatch = false;
1049                     vector<int> matches;
1050                     for (int i = 0; i < minFGroup.size(); i++) {
1051                         for (int j = 0; j < minFGroup[i].size(); j++) {
1052                             for (int k = 0; k < minRGroup.size(); k++) {
1053                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1054                             }
1055                         }
1056                     }
1057                     
1058                     int fStart = 0;
1059                     int rStart = 0;
1060                     if (matches.size() == 1) { 
1061                         foundMatch = true;
1062                         group = matches[0]; 
1063                         fStart = minFPos[0];
1064                         rStart = minRPos[0];
1065                     }
1066                     
1067                     //we have an acceptable match for the forward and reverse, but do they match?
1068                     if (foundMatch) {
1069                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1070                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1071                         forwardQual.trimQScores(fStart, -1);
1072                         reverseQual.trimQScores(rStart, -1);
1073                         success = minDiff;
1074                     }else { success = pdiffs + 100;     }
1075                 }
1076                         }
1077                         
1078                         if (alignment != NULL) {  delete alignment;  }
1079                 }
1080                 
1081                 return success;
1082                 
1083         }
1084         catch(exception& e) {
1085                 m->errorOut(e, "TrimOligos", "stripIForward");
1086                 exit(1);
1087         }
1088         
1089 }
1090 //*******************************************************************/
1091 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1092         try {
1093                 //look for forward barcode
1094                 string rawFSequence = forwardSeq.getUnaligned();
1095         string rawRSequence = reverseSeq.getUnaligned();
1096                 int success = pdiffs + 1;       //guilty until proven innocent
1097                 
1098                 //can you find the forward barcode
1099                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1100                         string foligo = it->second.forward;
1101             string roligo = it->second.reverse;
1102             
1103                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
1104                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1105                                 break;  
1106                         }
1107             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1108                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1109                                 break;  
1110                         }
1111                         
1112                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1113                                 group = it->first;
1114                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1115                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1116                                 success = 0;
1117                                 break;
1118                         }
1119                 }
1120                 
1121                 //if you found the barcode or if you don't want to allow for diffs
1122                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1123                 else { //try aligning and see if you can find it
1124                         Alignment* alignment;
1125                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1126                         else{ alignment = NULL; } 
1127                         
1128                         //can you find the barcode
1129                         int minDiff = 1e6;
1130                         int minCount = 1;
1131                         vector< vector<int> > minFGroup;
1132                         vector<int> minFPos; 
1133             
1134             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1135             /*
1136              1   Sarah   Westcott
1137              2   John    Westcott
1138              3   Anna    Westcott
1139              4   Sarah   Schloss
1140              5   Pat     Schloss
1141              6   Gail    Brown
1142              7   Pat     Moore
1143              
1144              only want to look for forward = Sarah, John, Anna, Pat, Gail
1145              reverse = Westcott, Schloss, Brown, Moore
1146              
1147              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
1148              */
1149             //cout << endl << forwardSeq.getName() << endl;         
1150                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1151                                 string oligo = it->first;
1152                                 
1153                                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1154                                         success = pdiffs + 10;
1155                                         break;
1156                                 }
1157                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1158                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1159                                 alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1160                                 oligo = alignment->getSeqAAln();
1161                                 string temp = alignment->getSeqBAln();
1162                 
1163                                 int alnLength = oligo.length();
1164                                 
1165                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1166                                 oligo = oligo.substr(0,alnLength);
1167                                 temp = temp.substr(0,alnLength);
1168                 int numDiff = countDiffs(oligo, temp);
1169                                 
1170                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1171                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1172                 
1173                                 if(numDiff < minDiff){
1174                                         minDiff = numDiff;
1175                                         minCount = 1;
1176                                         minFGroup.clear();
1177                     minFGroup.push_back(it->second);
1178                                         int tempminFPos = 0;
1179                     minFPos.clear();
1180                                         for(int i=0;i<alnLength;i++){
1181                                                 if(temp[i] != '-'){
1182                                                         tempminFPos++;
1183                                                 }
1184                                         }
1185                     minFPos.push_back(tempminFPos);
1186                                 }else if(numDiff == minDiff){
1187                                         minFGroup.push_back(it->second);
1188                     int tempminFPos = 0;
1189                                         for(int i=0;i<alnLength;i++){
1190                                                 if(temp[i] != '-'){
1191                                                         tempminFPos++;
1192                                                 }
1193                                         }
1194                     minFPos.push_back(tempminFPos);
1195                                 }
1196                         }
1197             
1198                         //cout << minDiff << '\t' << minCount << '\t' << endl;
1199                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1200                         else{   
1201                 //check for reverse match
1202                 if (alignment != NULL) {  delete alignment;  }
1203                 
1204                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
1205                 else{ alignment = NULL; } 
1206                 
1207                 //can you find the barcode
1208                 minDiff = 1e6;
1209                 minCount = 1;
1210                 vector< vector<int> > minRGroup;
1211                 vector<int> minRPos;
1212                 
1213                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1214                     string oligo = it->first;
1215                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1216                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1217                         success = pdiffs + 10;
1218                         break;
1219                     }
1220                     
1221                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1222                     alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1223                     oligo = alignment->getSeqAAln();
1224                     string temp = alignment->getSeqBAln();
1225                     
1226                     int alnLength = oligo.length();
1227                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1228                     oligo = oligo.substr(0,alnLength);
1229                     temp = temp.substr(0,alnLength);
1230                     int numDiff = countDiffs(oligo, temp);
1231                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1232                     
1233                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1234                     if(numDiff < minDiff){
1235                         minDiff = numDiff;
1236                         minCount = 1;
1237                         minRGroup.clear();
1238                         minRGroup.push_back(it->second);
1239                         int tempminRPos = 0;
1240                         minRPos.clear();
1241                         for(int i=0;i<alnLength;i++){
1242                             if(temp[i] != '-'){
1243                                 tempminRPos++;
1244                             }
1245                         }
1246                         minRPos.push_back(tempminRPos);                    
1247                     }else if(numDiff == minDiff){
1248                         int tempminRPos = 0;
1249                         for(int i=0;i<alnLength;i++){
1250                             if(temp[i] != '-'){
1251                                 tempminRPos++;
1252                             }
1253                         }
1254                         minRPos.push_back(tempminRPos);  
1255                         minRGroup.push_back(it->second);
1256                     }
1257                     
1258                 }
1259                 
1260                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1261                  for (int i = 0; i < minFGroup.size(); i++) { 
1262                  cout << i << '\t';
1263                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1264                  cout << endl;
1265                  }
1266                  cout << endl;
1267                  for (int i = 0; i < minRGroup.size(); i++) { 
1268                  cout << i << '\t';
1269                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1270                  cout << endl;
1271                  }
1272                  cout << endl;*/
1273                 if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1274                 else  {  
1275                     bool foundMatch = false;
1276                     vector<int> matches;
1277                     for (int i = 0; i < minFGroup.size(); i++) {
1278                         for (int j = 0; j < minFGroup[i].size(); j++) {
1279                             for (int k = 0; k < minRGroup.size(); k++) {
1280                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1281                             }
1282                         }
1283                     }
1284                     
1285                     int fStart = 0;
1286                     int rStart = 0;
1287                     if (matches.size() == 1) { 
1288                         foundMatch = true;
1289                         group = matches[0]; 
1290                         fStart = minFPos[0];
1291                         rStart = minRPos[0];
1292                     }
1293                     
1294                     //we have an acceptable match for the forward and reverse, but do they match?
1295                     if (foundMatch) {
1296                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1297                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1298                         success = minDiff;
1299                     }else { success = pdiffs + 100;     }
1300                 }
1301                         }
1302                         
1303                         if (alignment != NULL) {  delete alignment;  }
1304                 }
1305                 
1306                 return success;
1307                 
1308         }
1309         catch(exception& e) {
1310                 m->errorOut(e, "TrimOligos", "stripIForward");
1311                 exit(1);
1312         }
1313         
1314 }
1315 //*******************************************************************/
1316 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1317         try {
1318                 
1319                 string rawSequence = seq.getUnaligned();
1320                 int success = bdiffs + 1;       //guilty until proven innocent
1321                 
1322                 //can you find the barcode
1323                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1324                         string oligo = it->first;
1325                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1326                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1327                                 break;  
1328                         }
1329                         
1330                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1331                                 group = it->second;
1332                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1333                                 
1334                                 success = 0;
1335                                 break;
1336                         }
1337                 }
1338                 
1339                 //if you found the barcode or if you don't want to allow for diffs
1340                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1341                 
1342                 else { //try aligning and see if you can find it
1343             Alignment* alignment;
1344                         if (barcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));  }
1345                         else{ alignment = NULL; } 
1346                         
1347                         //can you find the barcode
1348                         int minDiff = 1e6;
1349                         int minCount = 1;
1350                         int minGroup = -1;
1351                         int minPos = 0;
1352                         
1353                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1354                                 string oligo = it->first;
1355                                 //                              int length = oligo.length();
1356                                 
1357                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
1358                                         success = bdiffs + 10;
1359                                         break;
1360                                 }
1361                                 
1362                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1363                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1364                                 oligo = alignment->getSeqAAln();
1365                                 string temp = alignment->getSeqBAln();
1366                                  
1367                                 int alnLength = oligo.length();
1368                                 
1369                                 for(int i=oligo.length()-1;i>=0;i--){
1370                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1371                                 }
1372                                 oligo = oligo.substr(0,alnLength);
1373                                 temp = temp.substr(0,alnLength);
1374                                  
1375                                 int numDiff = countDiffs(oligo, temp);
1376                                 
1377                                 if(numDiff < minDiff){
1378                                         minDiff = numDiff;
1379                                         minCount = 1;
1380                                         minGroup = it->second;
1381                                         minPos = 0;
1382                                         for(int i=0;i<alnLength;i++){
1383                                                 if(temp[i] != '-'){
1384                                                         minPos++;
1385                                                 }
1386                                         }
1387                                 }
1388                                 else if(numDiff == minDiff){
1389                                         minCount++;
1390                                 }
1391                                 
1392                         }
1393                         
1394                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1395                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1396                         else{                                                                                                   //use the best match
1397                                 group = minGroup;
1398                                 seq.setUnaligned(rawSequence.substr(minPos));
1399                                 success = minDiff;
1400                         }
1401                         
1402                         if (alignment != NULL) {  delete alignment;  }
1403                         
1404                 }
1405                 
1406                 return success;
1407                 
1408         }
1409         catch(exception& e) {
1410                 m->errorOut(e, "TrimOligos", "stripBarcode");
1411                 exit(1);
1412         }
1413         
1414 }
1415
1416 /********************************************************************/
1417 int TrimOligos::stripForward(Sequence& seq, int& group){
1418         try {
1419                 
1420                 string rawSequence = seq.getUnaligned();
1421                 int success = pdiffs + 1;       //guilty until proven innocent
1422                 
1423                 //can you find the primer
1424                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1425                         string oligo = it->first;
1426                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1427                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1428                                 break;  
1429                         }
1430                         
1431                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1432                                 group = it->second;
1433                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1434                                 success = 0;
1435                                 break;
1436                         }
1437                 }
1438                 
1439                 //if you found the barcode or if you don't want to allow for diffs
1440                 if ((pdiffs == 0) || (success == 0)) {  return success;  }
1441                 
1442                 else { //try aligning and see if you can find it
1443                         Alignment* alignment;
1444                         if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1445             else{ alignment = NULL; } 
1446                         
1447                         //can you find the barcode
1448                         int minDiff = 1e6;
1449                         int minCount = 1;
1450                         int minGroup = -1;
1451                         int minPos = 0;
1452                         
1453                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1454                                 string oligo = it->first;
1455                                 //                              int length = oligo.length();
1456                                 
1457                                 if(rawSequence.length() < maxFPrimerLength){    
1458                                         success = pdiffs + 100;
1459                                         break;
1460                                 }
1461                                 
1462                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1463                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1464                                 oligo = alignment->getSeqAAln();
1465                                 string temp = alignment->getSeqBAln();
1466                                 
1467                                 int alnLength = oligo.length();
1468                                 
1469                                 for(int i=oligo.length()-1;i>=0;i--){
1470                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1471                                 }
1472                                 oligo = oligo.substr(0,alnLength);
1473                                 temp = temp.substr(0,alnLength);
1474                                 
1475                                 int numDiff = countDiffs(oligo, temp);
1476                                 
1477                                 if(numDiff < minDiff){
1478                                         minDiff = numDiff;
1479                                         minCount = 1;
1480                                         minGroup = it->second;
1481                                         minPos = 0;
1482                                         for(int i=0;i<alnLength;i++){
1483                                                 if(temp[i] != '-'){
1484                                                         minPos++;
1485                                                 }
1486                                         }
1487                                 }
1488                                 else if(numDiff == minDiff){
1489                                         minCount++;
1490                                 }
1491                                 
1492                         }
1493                         
1494                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1495                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1496                         else{                                                                                                   //use the best match
1497                                 group = minGroup;
1498                                 seq.setUnaligned(rawSequence.substr(minPos));
1499                                 success = minDiff;
1500                         }
1501                         
1502                         if (alignment != NULL) {  delete alignment;  }
1503                         
1504                 }
1505                 
1506                 return success;
1507                 
1508         }
1509         catch(exception& e) {
1510                 m->errorOut(e, "TrimOligos", "stripForward");
1511                 exit(1);
1512         }
1513 }
1514 //*******************************************************************/
1515 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1516         try {
1517                 string rawSequence = seq.getUnaligned();
1518                 int success = pdiffs + 1;       //guilty until proven innocent
1519                 
1520                 //can you find the primer
1521                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1522                         string oligo = it->first;
1523                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1524                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1525                                 break;  
1526                         }
1527                         
1528                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1529                                 group = it->second;
1530                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
1531                                 if(qual.getName() != ""){
1532                                         if (!keepForward) {  qual.trimQScores(oligo.length(), -1); }
1533                                 }
1534                                 success = 0;
1535                                 break;
1536                         }
1537                 }
1538                 
1539                 //if you found the barcode or if you don't want to allow for diffs
1540                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1541                 
1542                 else { //try aligning and see if you can find it
1543                         Alignment* alignment;
1544                         if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
1545                         else{ alignment = NULL; } 
1546                         
1547                         //can you find the barcode
1548                         int minDiff = 1e6;
1549                         int minCount = 1;
1550                         int minGroup = -1;
1551                         int minPos = 0;
1552                         
1553                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1554                                 string oligo = it->first;
1555                                 //                              int length = oligo.length();
1556                                 
1557                                 if(rawSequence.length() < maxFPrimerLength){    
1558                                         success = pdiffs + 100;
1559                                         break;
1560                                 }
1561                                 
1562                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1563                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1564                                 oligo = alignment->getSeqAAln();
1565                                 string temp = alignment->getSeqBAln();
1566                                 
1567                                 int alnLength = oligo.length();
1568                                 
1569                                 for(int i=oligo.length()-1;i>=0;i--){
1570                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1571                                 }
1572                                 oligo = oligo.substr(0,alnLength);
1573                                 temp = temp.substr(0,alnLength);
1574                                 
1575                                 int numDiff = countDiffs(oligo, temp);
1576                                 
1577                                 if(numDiff < minDiff){
1578                                         minDiff = numDiff;
1579                                         minCount = 1;
1580                                         minGroup = it->second;
1581                                         minPos = 0;
1582                                         for(int i=0;i<alnLength;i++){
1583                                                 if(temp[i] != '-'){
1584                                                         minPos++;
1585                                                 }
1586                                         }
1587                                 }
1588                                 else if(numDiff == minDiff){
1589                                         minCount++;
1590                                 }
1591                                 
1592                         }
1593                         
1594                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1595                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1596                         else{                                                                                                   //use the best match
1597                                 group = minGroup;
1598                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
1599                                 if(qual.getName() != ""){
1600                                         if (!keepForward) { qual.trimQScores(minPos, -1); }
1601                                 }
1602                                 success = minDiff;
1603                         }
1604                         
1605                         if (alignment != NULL) {  delete alignment;  }
1606                         
1607                 }
1608                 
1609                 return success;
1610                 
1611         }
1612         catch(exception& e) {
1613                 m->errorOut(e, "TrimOligos", "stripForward");
1614                 exit(1);
1615         }
1616 }
1617 //******************************************************************/
1618 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
1619         try {
1620                 string rawSequence = seq.getUnaligned();
1621                 bool success = 0;       //guilty until proven innocent
1622                 
1623                 for(int i=0;i<revPrimer.size();i++){
1624                         string oligo = revPrimer[i];
1625                         
1626                         if(rawSequence.length() < oligo.length()){
1627                                 success = 0;
1628                                 break;
1629                         }
1630                         
1631                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1632                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1633                                 if(qual.getName() != ""){
1634                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1635                                 }
1636                                 success = 1;
1637                                 break;
1638                         }
1639                 }       
1640                 return success;
1641                 
1642         }
1643         catch(exception& e) {
1644                 m->errorOut(e, "TrimOligos", "stripReverse");
1645                 exit(1);
1646         }
1647 }
1648 //******************************************************************/
1649 bool TrimOligos::stripReverse(Sequence& seq){
1650         try {
1651                 
1652                 string rawSequence = seq.getUnaligned();
1653                 bool success = 0;       //guilty until proven innocent
1654                 
1655                 for(int i=0;i<revPrimer.size();i++){
1656                         string oligo = revPrimer[i];
1657                         
1658                         if(rawSequence.length() < oligo.length()){
1659                                 success = 0;
1660                                 break;
1661                         }
1662                         
1663                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1664                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1665                                 success = 1;
1666                                 break;
1667                         }
1668                 }       
1669                 
1670                 return success;
1671                 
1672         }
1673         catch(exception& e) {
1674                 m->errorOut(e, "TrimOligos", "stripReverse");
1675                 exit(1);
1676         }
1677 }
1678 //******************************************************************/
1679 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
1680         try {
1681                 string rawSequence = seq.getUnaligned();
1682                 bool success = ldiffs + 1;      //guilty until proven innocent
1683                 
1684                 for(int i=0;i<linker.size();i++){
1685                         string oligo = linker[i];
1686                         
1687                         if(rawSequence.length() < oligo.length()){
1688                                 success = ldiffs + 10;
1689                                 break;
1690                         }
1691                         
1692                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1693                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1694                                 if(qual.getName() != ""){
1695                                         qual.trimQScores(oligo.length(), -1);
1696                                 }
1697                                 success = 0;
1698                                 break;
1699                         }
1700                 }
1701         
1702         //if you found the linker or if you don't want to allow for diffs
1703                 if ((ldiffs == 0) || (success == 0)) { return success;  }
1704                 
1705                 else { //try aligning and see if you can find it
1706                         Alignment* alignment;
1707                         if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));   }     
1708                         else{ alignment = NULL; } 
1709                         
1710                         //can you find the barcode
1711                         int minDiff = 1e6;
1712                         int minCount = 1;
1713                         int minPos = 0;
1714                         
1715                         for(int i = 0; i < linker.size(); i++){
1716                                 string oligo = linker[i];
1717                                 //                              int length = oligo.length();
1718                                 
1719                                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
1720                                         success = ldiffs + 10;
1721                                         break;
1722                                 }
1723                                 
1724                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1725                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1726                                 oligo = alignment->getSeqAAln();
1727                                 string temp = alignment->getSeqBAln();
1728                                 
1729                                 int alnLength = oligo.length();
1730                                 
1731                                 for(int i=oligo.length()-1;i>=0;i--){
1732                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1733                                 }
1734                                 oligo = oligo.substr(0,alnLength);
1735                                 temp = temp.substr(0,alnLength);
1736                                 
1737                                 int numDiff = countDiffs(oligo, temp);
1738                                 
1739                                 if(numDiff < minDiff){
1740                                         minDiff = numDiff;
1741                                         minCount = 1;
1742                                         minPos = 0;
1743                                         for(int i=0;i<alnLength;i++){
1744                                                 if(temp[i] != '-'){
1745                                                         minPos++;
1746                                                 }
1747                                         }
1748                                 }
1749                                 else if(numDiff == minDiff){
1750                                         minCount++;
1751                                 }
1752                                 
1753                         }
1754                         
1755                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
1756                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
1757                         else{                                                                                                   //use the best match
1758                                 seq.setUnaligned(rawSequence.substr(minPos));
1759                                 
1760                                 if(qual.getName() != ""){
1761                                         qual.trimQScores(minPos, -1);
1762                                 }
1763                                 success = minDiff;
1764                         }
1765                         
1766                         if (alignment != NULL) {  delete alignment;  }
1767                         
1768                 }
1769
1770        
1771                 return success;
1772                 
1773         }
1774         catch(exception& e) {
1775                 m->errorOut(e, "TrimOligos", "stripLinker");
1776                 exit(1);
1777         }
1778 }
1779 //******************************************************************/
1780 bool TrimOligos::stripLinker(Sequence& seq){
1781         try {
1782                 
1783                 string rawSequence = seq.getUnaligned();
1784                 bool success = ldiffs +1;       //guilty until proven innocent
1785                 
1786                 for(int i=0;i<linker.size();i++){
1787                         string oligo = linker[i];
1788                         
1789                         if(rawSequence.length() < oligo.length()){
1790                                 success = ldiffs +10;
1791                                 break;
1792                         }
1793                         
1794                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1795                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1796                                 success = 0;
1797                                 break;
1798                         }
1799                 }       
1800                 
1801         //if you found the linker or if you don't want to allow for diffs
1802                 if ((ldiffs == 0) || (success == 0)) { return success;  }
1803                 
1804                 else { //try aligning and see if you can find it
1805                         Alignment* alignment;
1806                         if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));  }
1807             else{ alignment = NULL; } 
1808                         
1809                         //can you find the barcode
1810                         int minDiff = 1e6;
1811                         int minCount = 1;
1812                         int minPos = 0;
1813                         
1814                         for(int i = 0; i < linker.size(); i++){
1815                                 string oligo = linker[i];
1816                                 //                              int length = oligo.length();
1817                                 
1818                                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
1819                                         success = ldiffs + 10;
1820                                         break;
1821                                 }
1822                                 
1823                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1824                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
1825                                 oligo = alignment->getSeqAAln();
1826                                 string temp = alignment->getSeqBAln();
1827                                 
1828                                 int alnLength = oligo.length();
1829                                 
1830                                 for(int i=oligo.length()-1;i>=0;i--){
1831                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1832                                 }
1833                                 oligo = oligo.substr(0,alnLength);
1834                                 temp = temp.substr(0,alnLength);
1835                                 
1836                                 int numDiff = countDiffs(oligo, temp);
1837                                 
1838                                 if(numDiff < minDiff){
1839                                         minDiff = numDiff;
1840                                         minCount = 1;
1841                                         minPos = 0;
1842                                         for(int i=0;i<alnLength;i++){
1843                                                 if(temp[i] != '-'){
1844                                                         minPos++;
1845                                                 }
1846                                         }
1847                                 }
1848                                 else if(numDiff == minDiff){
1849                                         minCount++;
1850                                 }
1851                                 
1852                         }
1853                         
1854                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
1855                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
1856                         else{                                                                                                   //use the best match
1857                                 seq.setUnaligned(rawSequence.substr(minPos));
1858                                 success = minDiff;
1859                         }
1860                         
1861                         if (alignment != NULL) {  delete alignment;  }
1862                         
1863                 }
1864
1865                 return success;
1866                 
1867         }
1868         catch(exception& e) {
1869                 m->errorOut(e, "TrimOligos", "stripLinker");
1870                 exit(1);
1871         }
1872 }
1873
1874 //******************************************************************/
1875 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
1876         try {
1877                 string rawSequence = seq.getUnaligned();
1878                 bool success = sdiffs+1;        //guilty until proven innocent
1879                 
1880                 for(int i=0;i<spacer.size();i++){
1881                         string oligo = spacer[i];
1882                         
1883                         if(rawSequence.length() < oligo.length()){
1884                                 success = sdiffs+10;
1885                                 break;
1886                         }
1887                         
1888                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1889                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1890                                 if(qual.getName() != ""){
1891                                         qual.trimQScores(oligo.length(), -1);
1892                                 }
1893                                 success = 0;
1894                                 break;
1895                         }
1896                 }
1897         
1898         //if you found the spacer or if you don't want to allow for diffs
1899                 if ((sdiffs == 0) || (success == 0)) { return success;  }
1900                 
1901                 else { //try aligning and see if you can find it
1902                         Alignment* alignment;
1903                         if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
1904             else{ alignment = NULL; } 
1905                         
1906                         //can you find the barcode
1907                         int minDiff = 1e6;
1908                         int minCount = 1;
1909                         int minPos = 0;
1910                         
1911                         for(int i = 0; i < spacer.size(); i++){
1912                                 string oligo = spacer[i];
1913                                 //                              int length = oligo.length();
1914                                 
1915                                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
1916                                         success = sdiffs + 10;
1917                                         break;
1918                                 }
1919                                 
1920                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1921                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
1922                                 oligo = alignment->getSeqAAln();
1923                                 string temp = alignment->getSeqBAln();
1924                                 
1925                                 int alnLength = oligo.length();
1926                                 
1927                                 for(int i=oligo.length()-1;i>=0;i--){
1928                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1929                                 }
1930                                 oligo = oligo.substr(0,alnLength);
1931                                 temp = temp.substr(0,alnLength);
1932                                 
1933                                 int numDiff = countDiffs(oligo, temp);
1934                                 
1935                                 if(numDiff < minDiff){
1936                                         minDiff = numDiff;
1937                                         minCount = 1;
1938                                         minPos = 0;
1939                                         for(int i=0;i<alnLength;i++){
1940                                                 if(temp[i] != '-'){
1941                                                         minPos++;
1942                                                 }
1943                                         }
1944                                 }
1945                                 else if(numDiff == minDiff){
1946                                         minCount++;
1947                                 }
1948                                 
1949                         }
1950                         
1951                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
1952                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
1953                         else{                                                                                                   //use the best match
1954                                 seq.setUnaligned(rawSequence.substr(minPos));
1955                                 
1956                                 if(qual.getName() != ""){
1957                                         qual.trimQScores(minPos, -1);
1958                                 }
1959                                 success = minDiff;
1960                         }
1961                         
1962                         if (alignment != NULL) {  delete alignment;  }
1963                         
1964                 }
1965         
1966
1967                 return success;
1968                 
1969         }
1970         catch(exception& e) {
1971                 m->errorOut(e, "TrimOligos", "stripSpacer");
1972                 exit(1);
1973         }
1974 }
1975 //******************************************************************/
1976 bool TrimOligos::stripSpacer(Sequence& seq){
1977         try {
1978                 
1979                 string rawSequence = seq.getUnaligned();
1980                 bool success = sdiffs+1;        //guilty until proven innocent
1981                 
1982                 for(int i=0;i<spacer.size();i++){
1983                         string oligo = spacer[i];
1984                         
1985                         if(rawSequence.length() < oligo.length()){
1986                                 success = sdiffs+10;
1987                                 break;
1988                         }
1989                         
1990                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1991                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1992                                 success = 0;
1993                                 break;
1994                         }
1995                 }       
1996                 
1997         //if you found the spacer or if you don't want to allow for diffs
1998                 if ((sdiffs == 0) || (success == 0)) { return success;  }
1999                 
2000                 else { //try aligning and see if you can find it
2001                         Alignment* alignment;
2002                         if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
2003             else{ alignment = NULL; } 
2004                         
2005                         //can you find the barcode
2006                         int minDiff = 1e6;
2007                         int minCount = 1;
2008                         int minPos = 0;
2009                         
2010                         for(int i = 0; i < spacer.size(); i++){
2011                                 string oligo = spacer[i];
2012                                 //                              int length = oligo.length();
2013                                 
2014                                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2015                                         success = sdiffs + 10;
2016                                         break;
2017                                 }
2018                                 
2019                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2020                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2021                                 oligo = alignment->getSeqAAln();
2022                                 string temp = alignment->getSeqBAln();
2023                                 
2024                                 int alnLength = oligo.length();
2025                                 
2026                                 for(int i=oligo.length()-1;i>=0;i--){
2027                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2028                                 }
2029                                 oligo = oligo.substr(0,alnLength);
2030                                 temp = temp.substr(0,alnLength);
2031                                 
2032                                 int numDiff = countDiffs(oligo, temp);
2033                                 
2034                                 if(numDiff < minDiff){
2035                                         minDiff = numDiff;
2036                                         minCount = 1;
2037                                         minPos = 0;
2038                                         for(int i=0;i<alnLength;i++){
2039                                                 if(temp[i] != '-'){
2040                                                         minPos++;
2041                                                 }
2042                                         }
2043                                 }
2044                                 else if(numDiff == minDiff){
2045                                         minCount++;
2046                                 }
2047                                 
2048                         }
2049                         
2050                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
2051                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2052                         else{                                                                                                   //use the best match
2053                                 seq.setUnaligned(rawSequence.substr(minPos));
2054                                 success = minDiff;
2055                         }
2056                         
2057                         if (alignment != NULL) {  delete alignment;  }
2058                         
2059                 }
2060
2061                 return success;
2062                 
2063         }
2064         catch(exception& e) {
2065                 m->errorOut(e, "TrimOligos", "stripSpacer");
2066                 exit(1);
2067         }
2068 }
2069
2070 //******************************************************************/
2071 bool TrimOligos::compareDNASeq(string oligo, string seq){
2072         try {
2073                 bool success = 1;
2074                 int length = oligo.length();
2075                 
2076                 for(int i=0;i<length;i++){
2077                         
2078                         if(oligo[i] != seq[i]){
2079                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
2080                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
2081                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
2082                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
2083                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
2084                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
2085                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
2086                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
2087                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2088                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2089                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
2090                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
2091                                 
2092                                 if(success == 0)        {       break;   }
2093                         }
2094                         else{
2095                                 success = 1;
2096                         }
2097                 }
2098                 
2099                 return success;
2100         }
2101         catch(exception& e) {
2102                 m->errorOut(e, "TrimOligos", "compareDNASeq");
2103                 exit(1);
2104         }
2105         
2106 }
2107 //********************************************************************/
2108 int TrimOligos::countDiffs(string oligo, string seq){
2109         try {
2110                 
2111                 int length = oligo.length();
2112                 int countDiffs = 0;
2113                 
2114                 for(int i=0;i<length;i++){
2115                         
2116                         if(oligo[i] != seq[i]){
2117                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
2118                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
2119                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
2120                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
2121                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
2122                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
2123                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
2124                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
2125                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2126                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2127                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
2128                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
2129                         }
2130                         
2131                 }
2132                 
2133                 return countDiffs;
2134         }
2135         catch(exception& e) {
2136                 m->errorOut(e, "TrimOligos", "countDiffs");
2137                 exit(1);
2138         }
2139 }
2140 /********************************************************************/
2141
2142
2143