]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
added N code to flow data. removed changed for trimming of fast=T in trim.flows.
[mothur.git] / trimoligos.cpp
1 /*
2  *  trimoligos.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/1/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18         try {
19                 m = MothurOut::getInstance();
20         paired = false;
21         trashCode = "";
22                 
23                 pdiffs = p;
24                 bdiffs = b;
25         ldiffs = l;
26         sdiffs = s;
27                 
28                 barcodes = br;
29                 primers = pr;
30                 revPrimer = r;
31         linker = lk;
32         spacer = sp;
33         maxFBarcodeLength = 0;
34         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
35             if(it->first.length() > maxFBarcodeLength){
36                 maxFBarcodeLength = it->first.length();
37             }
38         }
39         maxFPrimerLength = 0;
40         map<string,int>::iterator it; 
41         for(it=primers.begin();it!=primers.end();it++){
42             if(it->first.length() > maxFPrimerLength){
43                 maxFPrimerLength = it->first.length();
44             }
45         }
46         
47         maxLinkerLength = 0;
48         for(int i = 0; i < linker.size(); i++){
49             if(linker[i].length() > maxLinkerLength){
50                 maxLinkerLength = linker[i].length();
51             }
52         }
53         
54         maxSpacerLength = 0;
55         for(int i = 0; i < spacer.size(); i++){
56             if(spacer[i].length() > maxSpacerLength){
57                 maxSpacerLength = spacer[i].length();
58             }
59         }
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "TrimOligos", "TrimOligos");
63                 exit(1);
64         }
65 }
66 /********************************************************************/
67 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
68 TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<int, oligosPair> br){
69         try {
70                 m = MothurOut::getInstance();
71                 
72                 pdiffs = p;
73                 bdiffs = b;
74         ldiffs = l;
75         sdiffs = s;
76         paired = true;
77         trashCode = "";
78                 
79         maxFBarcodeLength = 0;
80         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
81             string forward = it->second.forward;
82             map<string, vector<int> >::iterator itForward = ifbarcodes.find(forward);
83             
84             if(forward.length() > maxFBarcodeLength){ maxFBarcodeLength = forward.length();  }
85             
86             if (itForward == ifbarcodes.end()) {
87                 vector<int> temp; temp.push_back(it->first);
88                 ifbarcodes[forward] = temp;
89             }else { itForward->second.push_back(it->first); }
90         }
91
92         maxFPrimerLength = 0;
93         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
94             string forward = it->second.forward;
95             map<string, vector<int> >::iterator itForward = ifprimers.find(forward);
96             
97             if(forward.length() > maxFPrimerLength){ maxFPrimerLength = forward.length();  }
98             
99             if (itForward == ifprimers.end()) {
100                 vector<int> temp; temp.push_back(it->first);
101                 ifprimers[forward] = temp;
102             }else { itForward->second.push_back(it->first); }
103         }
104         
105         maxRBarcodeLength = 0;
106         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
107             string forward = it->second.reverse;
108             map<string, vector<int> >::iterator itForward = irbarcodes.find(forward);
109             
110             if(forward.length() > maxRBarcodeLength){ maxRBarcodeLength = forward.length();  }
111             
112             if (itForward == irbarcodes.end()) {
113                 vector<int> temp; temp.push_back(it->first);
114                 irbarcodes[forward] = temp;
115             }else { itForward->second.push_back(it->first); }
116         }
117         
118         maxRPrimerLength = 0;
119         for(map<int,oligosPair>::iterator it=pr.begin();it!=pr.end();it++){
120             string forward = it->second.reverse;
121             map<string, vector<int> >::iterator itForward = irprimers.find(forward);
122             
123             if(forward.length() > maxRPrimerLength){ maxRPrimerLength = forward.length();  }
124             
125             if (itForward == irprimers.end()) {
126                 vector<int> temp; temp.push_back(it->first);
127                 irprimers[forward] = temp;
128             }else { itForward->second.push_back(it->first); }
129         }
130
131         ipbarcodes = br;
132         ipprimers = pr;
133         }
134         catch(exception& e) {
135                 m->errorOut(e, "TrimOligos", "TrimOligos");
136                 exit(1);
137         }
138 }
139 /********************************************************************/
140 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
141 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
142         try {
143                 m = MothurOut::getInstance();
144                 
145                 pdiffs = p;
146                 bdiffs = b;
147                 
148                 barcodes = br;
149                 primers = pr;
150                 revPrimer = r;
151         paired = false;
152         trashCode = "";
153         
154         maxFBarcodeLength = 0;
155         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
156             string oligo = it->first;
157             if(oligo.length() > maxFBarcodeLength){
158                 maxFBarcodeLength = oligo.length();
159             }
160         }
161         maxRBarcodeLength = maxFBarcodeLength;
162         
163         maxFPrimerLength = 0;
164         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
165             string oligo = it->first;
166             if(oligo.length() > maxFPrimerLength){
167                 maxFPrimerLength = oligo.length();
168             }
169         }
170         maxRPrimerLength = maxFPrimerLength;
171         }
172         catch(exception& e) {
173                 m->errorOut(e, "TrimOligos", "TrimOligos");
174                 exit(1);
175         }
176 }
177 /********************************************************************/
178 TrimOligos::~TrimOligos() {}
179 //********************************************************************/
180 bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
181         try {
182                 
183                 string rawSequence = seq.getUnaligned();
184         trashCode = "";
185                 
186                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
187                         string oligo = it->first;
188                         
189                         if(rawSequence.length() < oligo.length()) {  break;  }
190                         
191                         //search for primer
192             int olength = oligo.length();
193             for (int j = 0; j < rawSequence.length()-olength; j++){
194                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
195                 string rawChunk = rawSequence.substr(j, olength);
196                 if(compareDNASeq(oligo, rawChunk)) {
197                     primerStart = j;
198                     primerEnd = primerStart + olength;
199                     return true;
200                 }
201                 
202             }
203         }       
204                 
205         primerStart = 0; primerEnd = 0;
206         //if you don't want to allow for diffs
207                 if (pdiffs == 0) { trashCode = "f"; return false;  }
208                 else { //try aligning and see if you can find it
209                                                 
210                         //can you find the barcode
211                         int minDiff = 1e6;
212                         int minCount = 1;
213                         
214             Alignment* alignment;
215             if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
216             else{ alignment = NULL; } 
217             
218                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
219                 
220                 string prim = it->first;
221                 //search for primer
222                 int olength = prim.length();
223                 if (rawSequence.length() < olength) {} //ignore primers too long for this seq
224                 else{
225                     for (int j = 0; j < rawSequence.length()-olength; j++){
226                         
227                         string oligo = it->first;
228
229                         if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
230                                 
231                         string rawChunk = rawSequence.substr(j, olength+pdiffs);
232                         
233                         //use needleman to align first primer.length()+numdiffs of sequence to each barcode
234                         alignment->alignPrimer(oligo, rawChunk);
235                         oligo = alignment->getSeqAAln();
236                         string temp = alignment->getSeqBAln();
237                                 
238                         int alnLength = oligo.length();
239                                 
240                         for(int i=oligo.length()-1;i>=0;i--){
241                             if(oligo[i] != '-'){        alnLength = i+1;        break;  }
242                         }
243                         oligo = oligo.substr(0,alnLength);
244                         temp = temp.substr(0,alnLength);
245                                 
246                         int numDiff = countDiffs(oligo, temp);
247                                 
248                         if(numDiff < minDiff){
249                             minDiff = numDiff;
250                             minCount = 1;
251                             primerStart = j;
252                             primerEnd = primerStart + alnLength;
253                         }else if(numDiff == minDiff){ minCount++; }
254                     }
255                 }
256                         }
257                         
258             if (alignment != NULL) {  delete alignment;  }
259             
260                         if(minDiff > pdiffs)    {       primerStart = 0; primerEnd = 0; trashCode = "f"; return false;          }       //no good matches
261                         else if(minCount > 1)   {       primerStart = 0; primerEnd = 0; trashCode = "f"; return false;  }       //can't tell the difference between multiple primers
262                         else{  trashCode = ""; return true; }
263                 }
264         
265         trashCode = "f";
266         primerStart = 0; primerEnd = 0;
267                 return false;
268                 
269         }
270         catch(exception& e) {
271                 m->errorOut(e, "TrimOligos", "stripForward");
272                 exit(1);
273         }
274 }
275 //******************************************************************/
276 bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
277         try {
278                 trashCode = "";
279                 string rawSequence = seq.getUnaligned();
280                 
281                 for(int i=0;i<revPrimer.size();i++){
282                         string oligo = revPrimer[i];
283                         if(rawSequence.length() < oligo.length()) {  break;  }
284                         
285                         //search for primer
286             int olength = oligo.length();
287             for (int j = rawSequence.length()-olength; j >= 0; j--){
288                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
289                 string rawChunk = rawSequence.substr(j, olength);
290                 
291                 if(compareDNASeq(oligo, rawChunk)) {
292                     primerStart = j;
293                     primerEnd = primerStart + olength;
294                     return true;
295                 }
296                 
297             }
298                 }       
299                 
300         trashCode = "r";
301         primerStart = 0; primerEnd = 0;
302                 return false;
303         }
304         catch(exception& e) {
305                 m->errorOut(e, "PcrSeqsCommand", "findReverse");
306                 exit(1);
307         }
308 }
309 //*******************************************************************/
310
311 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
312         try {
313                 trashCode = "";
314         if (paired) {  int success = stripPairedBarcode(seq, qual, group);  return success; }
315         
316                 string rawSequence = seq.getUnaligned();
317                 int success = bdiffs + 1;       //guilty until proven innocent
318                 
319                 //can you find the barcode
320                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
321                         string oligo = it->first;
322                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
323                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
324                                 break;  
325                         }
326                         
327                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
328                                 group = it->second;
329                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
330                                 
331                                 if(qual.getName() != ""){
332                                         qual.trimQScores(oligo.length(), -1);
333                                 }
334                                 
335                                 success = 0;
336                 trashCode = "";
337                                 return success;
338                         }
339                 }
340                 
341                 //if you found the barcode or if you don't want to allow for diffs
342                 if (bdiffs == 0) { trashCode = "b"; return success;  }
343                 
344                 else { //try aligning and see if you can find it
345                         Alignment* alignment;
346                         if (barcodes.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
347                         else{ alignment = NULL; } 
348                         
349                         //can you find the barcode
350                         int minDiff = 1e6;
351                         int minCount = 1;
352                         int minGroup = -1;
353                         int minPos = 0;
354                         
355                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
356                                 string oligo = it->first;
357                                 //                              int length = oligo.length();
358                                 
359                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
360                                         success = bdiffs + 10; trashCode = "b";
361                                         break;
362                                 }
363                                 
364                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
365                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
366                                 oligo = alignment->getSeqAAln();
367                                 string temp = alignment->getSeqBAln();
368                                 
369                                 int alnLength = oligo.length();
370                                 
371                                 for(int i=oligo.length()-1;i>=0;i--){
372                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
373                                 }
374                                 oligo = oligo.substr(0,alnLength);
375                                 temp = temp.substr(0,alnLength);
376                 int numDiff = countDiffs(oligo, temp);
377                                 
378                                 if(numDiff < minDiff){
379                                         minDiff = numDiff;
380                                         minCount = 1;
381                                         minGroup = it->second;
382                                         minPos = 0;
383                                         for(int i=0;i<alnLength;i++){
384                                                 if(temp[i] != '-'){
385                                                         minPos++;
386                                                 }
387                                         }
388                                 }
389                                 else if(numDiff == minDiff){
390                                         minCount++;
391                                 }
392                                 
393                         }
394                         
395                         if(minDiff > bdiffs)    {       trashCode = "b"; success = minDiff;             }       //no good matches
396                         else if(minCount > 1)   {       trashCode = "b"; success = bdiffs + 100;        }       //can't tell the difference between multiple barcodes
397                         else{                                                                                                   //use the best match
398                                 group = minGroup;
399                                 seq.setUnaligned(rawSequence.substr(minPos));
400     
401                                 if(qual.getName() != ""){
402                                         qual.trimQScores(minPos, -1);
403                                 }
404                                 success = minDiff;
405                 trashCode = "";
406                         }
407                         
408                         if (alignment != NULL) {  delete alignment;  }
409                         
410                 }
411                 
412                 return success;
413                 
414         }
415         catch(exception& e) {
416                 m->errorOut(e, "TrimOligos", "stripBarcode");
417                 exit(1);
418         }
419 }
420 //*******************************************************************/
421 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
422         try {
423         trashCode = "";
424                 //look for forward barcode
425                 string rawFSequence = forwardSeq.getUnaligned();
426         string rawRSequence = reverseSeq.getUnaligned();
427                 int success = bdiffs + 1;       //guilty until proven innocent
428                 
429                 //can you find the forward barcode
430                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
431                         string foligo = it->second.forward;
432             string roligo = it->second.reverse;
433             
434                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
435                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
436                 trashCode += "b";
437                 break;
438                         }
439             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
440                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
441                 trashCode += "d";
442                                 break;  
443                         }
444                         
445                         if(compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) {
446                 if (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) {
447                     group = it->first;
448                     forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
449                     reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
450                     success = 0; trashCode = "";
451                     return success;
452                 }
453             }else {
454                 trashCode = "b";
455                 if (!compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) { trashCode += "d";  }
456             }
457                 }
458                 
459                 //if you found the barcode or if you don't want to allow for diffs
460                 if (bdiffs == 0) {  return success;  }
461                 else { //try aligning and see if you can find it
462             trashCode = "";
463                         Alignment* alignment;
464                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
465                         else{ alignment = NULL; } 
466                         
467                         //can you find the barcode
468                         int minDiff = 1e6;
469                         int minCount = 1;
470                         vector< vector<int> > minFGroup;
471                         vector<int> minFPos; 
472             
473             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
474             /*
475                 1   Sarah   Westcott
476                 2   John    Westcott
477                 3   Anna    Westcott
478                 4   Sarah   Schloss
479                 5   Pat     Schloss
480                 6   Gail    Brown
481                 7   Pat     Moore
482              
483                 only want to look for forward = Sarah, John, Anna, Pat, Gail
484                                       reverse = Westcott, Schloss, Brown, Moore
485              
486                 but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
487              */
488             //cout << endl << forwardSeq.getName() << endl;         
489                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
490                                 string oligo = it->first;
491                                 
492                                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
493                                         success = bdiffs + 10;
494                                         break;
495                                 }
496                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
497                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
498                                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
499                                 oligo = alignment->getSeqAAln();
500                                 string temp = alignment->getSeqBAln();
501                
502                                 int alnLength = oligo.length();
503                                 
504                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
505                                 oligo = oligo.substr(0,alnLength);
506                                 temp = temp.substr(0,alnLength);
507                 int numDiff = countDiffs(oligo, temp);
508                                 
509                 if (alnLength == 0) { numDiff = bdiffs + 100; }
510                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
511                 
512                                 if(numDiff < minDiff){
513                                         minDiff = numDiff;
514                                         minCount = 1;
515                                         minFGroup.clear();
516                     minFGroup.push_back(it->second);
517                                         int tempminFPos = 0;
518                     minFPos.clear();
519                                         for(int i=0;i<alnLength;i++){
520                                                 if(temp[i] != '-'){
521                                                         tempminFPos++;
522                                                 }
523                                         }
524                     minFPos.push_back(tempminFPos);
525                                 }else if(numDiff == minDiff){
526                                         minFGroup.push_back(it->second);
527                     int tempminFPos = 0;
528                                         for(int i=0;i<alnLength;i++){
529                                                 if(temp[i] != '-'){
530                                                         tempminFPos++;
531                                                 }
532                                         }
533                     minFPos.push_back(tempminFPos);
534                                 }
535                         }
536             
537                         //cout << minDiff << '\t' << minCount << '\t' << endl;
538                         if(minDiff > bdiffs)    {       trashCode = "b"; minFGroup.clear(); success = minDiff;          }       //no good matches
539                         //else{
540                 //check for reverse match
541                 if (alignment != NULL) {  delete alignment;  }
542                 
543                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
544                 else{ alignment = NULL; } 
545                 
546                 //can you find the barcode
547                 minDiff = 1e6;
548                 minCount = 1;
549                 vector< vector<int> > minRGroup;
550                 vector<int> minRPos;
551                 
552                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
553                     string oligo = it->first;
554                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
555                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
556                         success = bdiffs + 10;
557                         break;
558                     }
559                     
560                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
561                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
562                     oligo = alignment->getSeqAAln();
563                     string temp = alignment->getSeqBAln();
564                     
565                     int alnLength = oligo.length();
566                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
567                     oligo = oligo.substr(0,alnLength);
568                     temp = temp.substr(0,alnLength);
569                     int numDiff = countDiffs(oligo, temp);
570                     if (alnLength == 0) { numDiff = bdiffs + 100; }
571                     
572                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
573                     if(numDiff < minDiff){
574                         minDiff = numDiff;
575                         minCount = 1;
576                         minRGroup.clear();
577                         minRGroup.push_back(it->second);
578                         int tempminRPos = 0;
579                         minRPos.clear();
580                         for(int i=0;i<alnLength;i++){
581                             if(temp[i] != '-'){
582                                 tempminRPos++;
583                             }
584                         }
585                         minRPos.push_back(tempminRPos);                    
586                     }else if(numDiff == minDiff){
587                         int tempminRPos = 0;
588                         for(int i=0;i<alnLength;i++){
589                             if(temp[i] != '-'){
590                                 tempminRPos++;
591                             }
592                         }
593                         minRPos.push_back(tempminRPos);  
594                         minRGroup.push_back(it->second);
595                     }
596                     
597                 }
598                 
599                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
600                 for (int i = 0; i < minFGroup.size(); i++) { 
601                     cout << i << '\t';
602                     for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
603                     cout << endl;
604                 }
605                 cout << endl;
606                 for (int i = 0; i < minRGroup.size(); i++) { 
607                     cout << i << '\t';
608                     for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
609                     cout << endl;
610                 }
611                 cout << endl;*/
612                 if(minDiff > bdiffs)    {       trashCode += "d"; success = minDiff;            }       //no good matches
613                 else  {  
614                     bool foundMatch = false;
615                     vector<int> matches;
616                     for (int i = 0; i < minFGroup.size(); i++) {
617                         for (int j = 0; j < minFGroup[i].size(); j++) {
618                             for (int k = 0; k < minRGroup.size(); k++) {
619                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
620                             }
621                         }
622                     }
623                     
624                     int fStart = 0;
625                     int rStart = 0;
626                     if (matches.size() == 1) { 
627                         foundMatch = true;
628                         group = matches[0]; 
629                         fStart = minFPos[0];
630                         rStart = minRPos[0];
631                     }
632                     
633                     //we have an acceptable match for the forward and reverse, but do they match?
634                     if (foundMatch) {
635                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
636                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
637                         success = minDiff;
638                     }else { if (trashCode == "") { trashCode = "bd"; } success = bdiffs + 100;  }
639                 }
640                         //}
641                         
642                         if (alignment != NULL) {  delete alignment;  }
643                 }
644                 
645         //catchall for case I didn't think of
646         if ((trashCode == "") && (success > bdiffs)) { trashCode = "bd"; }
647         
648                 return success;
649                 
650         }
651         catch(exception& e) {
652                 m->errorOut(e, "TrimOligos", "stripIBarcode");
653                 exit(1);
654         }
655         
656 }
657 //*******************************************************************/
658 int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
659         try {
660         trashCode = "";
661         
662                 //look for forward barcode
663                 string rawFSequence = forwardSeq.getUnaligned();
664         string rawRSequence = reverseSeq.getUnaligned();
665                 int success = bdiffs + 1;       //guilty until proven innocent
666                 
667                 //can you find the forward barcode
668                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
669                         string foligo = it->second.forward;
670             string roligo = it->second.reverse;
671             
672                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
673                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
674                 trashCode = "b";
675                                 break;  
676                         }
677             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
678                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
679                 trashCode += "d";
680                                 break;  
681                         }
682                         
683                         if(compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) {
684                 if (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) {
685                     group = it->first;
686                     forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
687                     reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
688                     forwardQual.trimQScores(foligo.length(), -1);
689                     reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
690                     success = 0; trashCode = "";
691                     return success;
692                 }
693             }else {
694                 trashCode = "b";
695                 if (!compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) {  trashCode += "d";
696                 }
697                         }
698                 }
699                 
700                 //if you found the barcode or if you don't want to allow for diffs
701                 if (bdiffs == 0) { return success;  }
702                 else { //try aligning and see if you can find it
703             trashCode = "";
704                         Alignment* alignment;
705                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
706                         else{ alignment = NULL; } 
707                         
708                         //can you find the barcode
709                         int minDiff = 1e6;
710                         int minCount = 1;
711                         vector< vector<int> > minFGroup;
712                         vector<int> minFPos; 
713             
714             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
715             /*
716              1   Sarah   Westcott
717              2   John    Westcott
718              3   Anna    Westcott
719              4   Sarah   Schloss
720              5   Pat     Schloss
721              6   Gail    Brown
722              7   Pat     Moore
723              
724              only want to look for forward = Sarah, John, Anna, Pat, Gail
725              reverse = Westcott, Schloss, Brown, Moore
726              
727              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
728              */
729             //cout << endl << forwardSeq.getName() << endl;         
730                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
731                                 string oligo = it->first;
732                                 
733                                 if(rawFSequence.length() < maxFBarcodeLength){  //let's just assume that the barcodes are the same length
734                                         success = bdiffs + 10;
735                                         break;
736                                 }
737                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
738                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
739                                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
740                                 oligo = alignment->getSeqAAln();
741                                 string temp = alignment->getSeqBAln();
742                 
743                                 int alnLength = oligo.length();
744                                 
745                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
746                                 oligo = oligo.substr(0,alnLength);
747                                 temp = temp.substr(0,alnLength);
748                 int numDiff = countDiffs(oligo, temp);
749                                 
750                 if (alnLength == 0) { numDiff = bdiffs + 100; }
751                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
752                 
753                                 if(numDiff < minDiff){
754                                         minDiff = numDiff;
755                                         minCount = 1;
756                                         minFGroup.clear();
757                     minFGroup.push_back(it->second);
758                                         int tempminFPos = 0;
759                     minFPos.clear();
760                                         for(int i=0;i<alnLength;i++){
761                                                 if(temp[i] != '-'){
762                                                         tempminFPos++;
763                                                 }
764                                         }
765                     minFPos.push_back(tempminFPos);
766                                 }else if(numDiff == minDiff){
767                                         minFGroup.push_back(it->second);
768                     int tempminFPos = 0;
769                                         for(int i=0;i<alnLength;i++){
770                                                 if(temp[i] != '-'){
771                                                         tempminFPos++;
772                                                 }
773                                         }
774                     minFPos.push_back(tempminFPos);
775                                 }
776                         }
777             
778                         //cout << minDiff << '\t' << minCount << '\t' << endl;
779                         if(minDiff > bdiffs)    {       trashCode = "b"; minFGroup.clear(); success = minDiff;          }       //no good matches
780                         //else{
781                 //check for reverse match
782                 if (alignment != NULL) {  delete alignment;  }
783                 
784                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
785                 else{ alignment = NULL; } 
786                 
787                 //can you find the barcode
788                 minDiff = 1e6;
789                 minCount = 1;
790                 vector< vector<int> > minRGroup;
791                 vector<int> minRPos;
792                 
793                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
794                     string oligo = it->first;
795                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+bdiffs) << endl;
796                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
797                         success = bdiffs + 10;
798                         break;
799                     }
800                     
801                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
802                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
803                     oligo = alignment->getSeqAAln();
804                     string temp = alignment->getSeqBAln();
805                     
806                     int alnLength = oligo.length();
807                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
808                     oligo = oligo.substr(0,alnLength);
809                     temp = temp.substr(0,alnLength);
810                     int numDiff = countDiffs(oligo, temp);
811                     if (alnLength == 0) { numDiff = bdiffs + 100; }
812                     
813                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
814                     if(numDiff < minDiff){
815                         minDiff = numDiff;
816                         minCount = 1;
817                         minRGroup.clear();
818                         minRGroup.push_back(it->second);
819                         int tempminRPos = 0;
820                         minRPos.clear();
821                         for(int i=0;i<alnLength;i++){
822                             if(temp[i] != '-'){
823                                 tempminRPos++;
824                             }
825                         }
826                         minRPos.push_back(tempminRPos);                    
827                     }else if(numDiff == minDiff){
828                         int tempminRPos = 0;
829                         for(int i=0;i<alnLength;i++){
830                             if(temp[i] != '-'){
831                                 tempminRPos++;
832                             }
833                         }
834                         minRPos.push_back(tempminRPos);  
835                         minRGroup.push_back(it->second);
836                     }
837                     
838                 }
839                 
840                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
841                  for (int i = 0; i < minFGroup.size(); i++) { 
842                  cout << i << '\t';
843                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
844                  cout << endl;
845                  }
846                  cout << endl;
847                  for (int i = 0; i < minRGroup.size(); i++) { 
848                  cout << i << '\t';
849                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
850                  cout << endl;
851                  }
852                  cout << endl;*/
853                 if(minDiff > bdiffs)    {       trashCode = "d"; success = minDiff;             }       //no good matches
854                 else  {  
855                     bool foundMatch = false;
856                     vector<int> matches;
857                     for (int i = 0; i < minFGroup.size(); i++) {
858                         for (int j = 0; j < minFGroup[i].size(); j++) {
859                             for (int k = 0; k < minRGroup.size(); k++) {
860                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
861                             }
862                         }
863                     }
864                     
865                     int fStart = 0;
866                     int rStart = 0;
867                     if (matches.size() == 1) { 
868                         foundMatch = true;
869                         group = matches[0]; 
870                         fStart = minFPos[0];
871                         rStart = minRPos[0];
872                     }
873                     
874                     //we have an acceptable match for the forward and reverse, but do they match?
875                     if (foundMatch) {
876                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
877                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
878                         forwardQual.trimQScores(fStart, -1);
879                         reverseQual.trimQScores(rStart, -1);
880                         success = minDiff;
881                     }else { if (trashCode == "") { trashCode = "bd"; } success = bdiffs + 100;  }
882                 }
883                         //}
884                         
885                         if (alignment != NULL) {  delete alignment;  }
886                 }
887                 
888         //catchall for case I didn't think of
889         if ((trashCode == "") && (success > bdiffs)) { trashCode = "bd"; }
890         
891                 return success;
892                 
893         }
894         catch(exception& e) {
895                 m->errorOut(e, "TrimOligos", "stripIBarcode");
896                 exit(1);
897         }
898         
899 }
900 //*******************************************************************/
901 int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
902         try {
903         trashCode = "";
904         
905                 //look for forward barcode
906                 string rawSeq = seq.getUnaligned();
907                 int success = bdiffs + 1;       //guilty until proven innocent
908                 //cout << seq.getName() << endl;
909                 //can you find the forward barcode
910                 for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
911                         string foligo = it->second.forward;
912             string roligo = it->second.reverse;
913             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
914             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
915                         if(rawSeq.length() < foligo.length()){  //let's just assume that the barcodes are the same length
916                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
917                 trashCode = "b";
918                                 break;
919                         }
920             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
921                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
922                 trashCode += "d";
923                                 break;
924                         }
925                         
926                         if(compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) { //find forward
927                 if (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { //find reverse
928                     group = it->first;
929                     string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
930                     seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
931                     if(qual.getName() != ""){
932                         qual.trimQScores(-1, rawSeq.length()-roligo.length());
933                         qual.trimQScores(foligo.length(), -1);
934                     }
935                     success = 0;
936                     trashCode = "";
937                     return success;
938                 }
939             }else {
940                 trashCode = "b";
941                                 if (!compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { trashCode += "d"; }
942                         }
943                 }
944                 //cout << "success=" << success << endl;
945                 //if you found the barcode or if you don't want to allow for diffs
946                 if (bdiffs == 0) { return success;  }
947                 else { //try aligning and see if you can find it
948                         Alignment* alignment;
949                         if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
950                         else{ alignment = NULL; }
951                         
952                         //can you find the barcode
953                         int minDiff = 1e6;
954                         int minCount = 1;
955                         vector< vector<int> > minFGroup;
956                         vector<int> minFPos;
957             
958             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
959             /*
960              1   Sarah   Westcott
961              2   John    Westcott
962              3   Anna    Westcott
963              4   Sarah   Schloss
964              5   Pat     Schloss
965              6   Gail    Brown
966              7   Pat     Moore
967              
968              only want to look for forward = Sarah, John, Anna, Pat, Gail
969              reverse = Westcott, Schloss, Brown, Moore
970              
971              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
972              */
973             //cout << endl << forwardSeq.getName() << endl;
974                         for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
975                                 string oligo = it->first;
976                                 
977                                 if(rawSeq.length() < maxFBarcodeLength){        //let's just assume that the barcodes are the same length
978                                         success = bdiffs + 10;
979                                         break;
980                                 }
981                                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
982                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
983                                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
984                                 oligo = alignment->getSeqAAln();
985                                 string temp = alignment->getSeqBAln();
986                 
987                                 int alnLength = oligo.length();
988                                 
989                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
990                                 oligo = oligo.substr(0,alnLength);
991                                 temp = temp.substr(0,alnLength);
992                 int numDiff = countDiffs(oligo, temp);
993                                 
994                 if (alnLength == 0) { numDiff = bdiffs + 100; }
995                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
996                 
997                                 if(numDiff < minDiff){
998                                         minDiff = numDiff;
999                                         minCount = 1;
1000                                         minFGroup.clear();
1001                     minFGroup.push_back(it->second);
1002                                         int tempminFPos = 0;
1003                     minFPos.clear();
1004                                         for(int i=0;i<alnLength;i++){
1005                                                 if(temp[i] != '-'){
1006                                                         tempminFPos++;
1007                                                 }
1008                                         }
1009                     minFPos.push_back(tempminFPos);
1010                                 }else if(numDiff == minDiff){
1011                                         minFGroup.push_back(it->second);
1012                     int tempminFPos = 0;
1013                                         for(int i=0;i<alnLength;i++){
1014                                                 if(temp[i] != '-'){
1015                                                         tempminFPos++;
1016                                                 }
1017                                         }
1018                     minFPos.push_back(tempminFPos);
1019                                 }
1020                         }
1021             
1022                         //cout << minDiff << '\t' << minCount << '\t' << endl;
1023                         if(minDiff > bdiffs)    {       trashCode = "b"; minFGroup.clear(); success = minDiff;          }       //no good matches
1024                         //else{
1025                 //check for reverse match
1026                 if (alignment != NULL) {  delete alignment;  }
1027                 
1028                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
1029                 else{ alignment = NULL; }
1030                 
1031                 //can you find the barcode
1032                 minDiff = 1e6;
1033                 minCount = 1;
1034                 vector< vector<int> > minRGroup;
1035                 vector<int> minRPos;
1036                 
1037                 string rawRSequence = reverseOligo(seq.getUnaligned());
1038                 //cout << irbarcodes.size() << '\t' << maxRBarcodeLength <<  endl;
1039                 for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
1040                     string oligo = reverseOligo(it->first);
1041                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
1042                     if(rawRSequence.length() < maxRBarcodeLength){      //let's just assume that the barcodes are the same length
1043                         success = bdiffs + 10;
1044                         break;
1045                     }
1046                     
1047                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1048                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
1049                     oligo = alignment->getSeqAAln();
1050                     string temp = alignment->getSeqBAln();
1051                     
1052                     int alnLength = oligo.length();
1053                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1054                     oligo = oligo.substr(0,alnLength);
1055                     temp = temp.substr(0,alnLength);
1056                     int numDiff = countDiffs(oligo, temp);
1057                     if (alnLength == 0) { numDiff = bdiffs + 100; }
1058                     
1059                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1060                     if(numDiff < minDiff){
1061                         minDiff = numDiff;
1062                         minCount = 1;
1063                         minRGroup.clear();
1064                         minRGroup.push_back(it->second);
1065                         int tempminRPos = 0;
1066                         minRPos.clear();
1067                         for(int i=0;i<alnLength;i++){
1068                             if(temp[i] != '-'){
1069                                 tempminRPos++;
1070                             }
1071                         }
1072                         minRPos.push_back(tempminRPos);
1073                     }else if(numDiff == minDiff){
1074                         int tempminRPos = 0;
1075                         for(int i=0;i<alnLength;i++){
1076                             if(temp[i] != '-'){
1077                                 tempminRPos++;
1078                             }
1079                         }
1080                         minRPos.push_back(tempminRPos);
1081                         minRGroup.push_back(it->second);
1082                     }
1083                     
1084                 }
1085                 
1086                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1087                  for (int i = 0; i < minFGroup.size(); i++) {
1088                  cout << i << '\t';
1089                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1090                  cout << endl;
1091                  }
1092                  cout << endl;
1093                  for (int i = 0; i < minRGroup.size(); i++) {
1094                  cout << i << '\t';
1095                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1096                  cout << endl;
1097                  }
1098                  cout << endl;*/
1099                 if(minDiff > bdiffs)    {       trashCode += "d"; success = minDiff;            }       //no good matches
1100                 else  {
1101                     bool foundMatch = false;
1102                     vector<int> matches;
1103                     for (int i = 0; i < minFGroup.size(); i++) {
1104                         for (int j = 0; j < minFGroup[i].size(); j++) {
1105                             for (int k = 0; k < minRGroup.size(); k++) {
1106                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1107                             }
1108                         }
1109                     }
1110                     
1111                     int fStart = 0;
1112                     int rStart = 0;
1113                     if (matches.size() == 1) {
1114                         foundMatch = true;
1115                         group = matches[0];
1116                         fStart = minFPos[0];
1117                         rStart = rawSeq.length() - minRPos[0];
1118                     }
1119                     
1120                     //we have an acceptable match for the forward and reverse, but do they match?
1121                     if (foundMatch) {
1122                         string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1123                         seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1124                         if(qual.getName() != ""){
1125                             qual.trimQScores(-1, rStart);
1126                             qual.trimQScores(fStart, -1);
1127                         }
1128                         success = minDiff;
1129                         //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
1130                     }else { if (trashCode == "") { trashCode = "bd"; } success = bdiffs + 100;  }
1131                 }
1132                         //}
1133                         
1134                         if (alignment != NULL) {  delete alignment;  }
1135                 }
1136                 
1137         //catchall for case I didn't think of
1138         if ((trashCode == "") && (success > bdiffs)) { trashCode = "bd"; }
1139
1140                 return success;
1141                 
1142         }
1143         catch(exception& e) {
1144                 m->errorOut(e, "TrimOligos", "stripPairedBarcode");
1145                 exit(1);
1146         }
1147         
1148 }
1149 //*******************************************************************/
1150 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
1151         try {
1152         trashCode = "";
1153                 //look for forward 
1154                 string rawSeq = seq.getUnaligned();
1155                 int success = pdiffs + 1;       //guilty until proven innocent
1156         //cout << seq.getName() << endl;
1157                 //can you find the forward
1158                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1159                         string foligo = it->second.forward;
1160             string roligo = it->second.reverse;
1161             
1162             //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
1163             //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
1164                         if(rawSeq.length() < foligo.length()){  //let's just assume that the barcodes are the same length
1165                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1166                 trashCode = "p";
1167                                 break;
1168                         }
1169             if(rawSeq.length() < roligo.length()){      //let's just assume that the barcodes are the same length
1170                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1171                 trashCode += "q";
1172                                 break;
1173                         }
1174                         
1175             if(compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) { //find forward
1176                 if (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { //find reverse
1177                     group = it->first;
1178                     string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
1179                     seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
1180                     if(qual.getName() != ""){
1181                         qual.trimQScores(-1, rawSeq.length()-roligo.length());
1182                         qual.trimQScores(foligo.length(), -1);
1183                     }
1184                     success = 0;
1185                     trashCode = "";
1186                     return success;
1187                 }
1188             }else {
1189                 trashCode = "b";
1190                                 if (!compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { trashCode += "d"; }
1191                         }
1192
1193         }
1194                 //cout << "success=" << success << endl;
1195                 //if you found the barcode or if you don't want to allow for diffs
1196                 if (pdiffs == 0) { return success;  }
1197                 else { //try aligning and see if you can find it
1198                         Alignment* alignment;
1199                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1200                         else{ alignment = NULL; }
1201                         
1202                         //can you find the barcode
1203                         int minDiff = 1e6;
1204                         int minCount = 1;
1205                         vector< vector<int> > minFGroup;
1206                         vector<int> minFPos;
1207             
1208             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1209             /*
1210              1   Sarah   Westcott
1211              2   John    Westcott
1212              3   Anna    Westcott
1213              4   Sarah   Schloss
1214              5   Pat     Schloss
1215              6   Gail    Brown
1216              7   Pat     Moore
1217              
1218              only want to look for forward = Sarah, John, Anna, Pat, Gail
1219              reverse = Westcott, Schloss, Brown, Moore
1220              
1221              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
1222              */
1223             //cout << endl << forwardSeq.getName() << endl;
1224                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1225                                 string oligo = it->first;
1226                                 
1227                                 if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
1228                                         success = pdiffs + 10;
1229                                         break;
1230                                 }
1231                                 //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
1232                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1233                                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
1234                                 oligo = alignment->getSeqAAln();
1235                                 string temp = alignment->getSeqBAln();
1236                 
1237                                 int alnLength = oligo.length();
1238                                 
1239                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1240                                 oligo = oligo.substr(0,alnLength);
1241                                 temp = temp.substr(0,alnLength);
1242                 int numDiff = countDiffs(oligo, temp);
1243                                 
1244                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1245                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1246                 
1247                                 if(numDiff < minDiff){
1248                                         minDiff = numDiff;
1249                                         minCount = 1;
1250                                         minFGroup.clear();
1251                     minFGroup.push_back(it->second);
1252                                         int tempminFPos = 0;
1253                     minFPos.clear();
1254                                         for(int i=0;i<alnLength;i++){
1255                                                 if(temp[i] != '-'){
1256                                                         tempminFPos++;
1257                                                 }
1258                                         }
1259                     minFPos.push_back(tempminFPos);
1260                                 }else if(numDiff == minDiff){
1261                                         minFGroup.push_back(it->second);
1262                     int tempminFPos = 0;
1263                                         for(int i=0;i<alnLength;i++){
1264                                                 if(temp[i] != '-'){
1265                                                         tempminFPos++;
1266                                                 }
1267                                         }
1268                     minFPos.push_back(tempminFPos);
1269                                 }
1270                         }
1271             
1272                         //cout << minDiff << '\t' << minCount << '\t' << endl;
1273                         if(minDiff > pdiffs)    {       trashCode = "p"; minFGroup.clear(); success = minDiff;          }       //no good matches
1274                         //else{
1275                 //check for reverse match
1276                 if (alignment != NULL) {  delete alignment;  }
1277                 
1278                 if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
1279                 else{ alignment = NULL; }
1280                 
1281                 //can you find the barcode
1282                 minDiff = 1e6;
1283                 minCount = 1;
1284                 vector< vector<int> > minRGroup;
1285                 vector<int> minRPos;
1286                 
1287                 string rawRSequence = reverseOligo(seq.getUnaligned());
1288                 
1289                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1290                     string oligo = reverseOligo(it->first);
1291                     //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
1292                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1293                         success = pdiffs + 10;
1294                         break;
1295                     }
1296                     
1297                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1298                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1299                     oligo = alignment->getSeqAAln();
1300                     string temp = alignment->getSeqBAln();
1301                     
1302                     int alnLength = oligo.length();
1303                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1304                     oligo = oligo.substr(0,alnLength);
1305                     temp = temp.substr(0,alnLength);
1306                     int numDiff = countDiffs(oligo, temp);
1307                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1308                     
1309                     //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
1310                     if(numDiff < minDiff){
1311                         minDiff = numDiff;
1312                         minCount = 1;
1313                         minRGroup.clear();
1314                         minRGroup.push_back(it->second);
1315                         int tempminRPos = 0;
1316                         minRPos.clear();
1317                         for(int i=0;i<alnLength;i++){
1318                             if(temp[i] != '-'){
1319                                 tempminRPos++;
1320                             }
1321                         }
1322                         minRPos.push_back(tempminRPos);
1323                     }else if(numDiff == minDiff){
1324                         int tempminRPos = 0;
1325                         for(int i=0;i<alnLength;i++){
1326                             if(temp[i] != '-'){
1327                                 tempminRPos++;
1328                             }
1329                         }
1330                         minRPos.push_back(tempminRPos);
1331                         minRGroup.push_back(it->second);
1332                     }
1333                     
1334                 }
1335                 
1336                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1337                  for (int i = 0; i < minFGroup.size(); i++) {
1338                  cout << i << '\t';
1339                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1340                  cout << endl;
1341                  }
1342                  cout << endl;
1343                  for (int i = 0; i < minRGroup.size(); i++) {
1344                  cout << i << '\t';
1345                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1346                  cout << endl;
1347                  }
1348                  cout << endl;*/
1349                 if(minDiff > pdiffs)    { trashCode += "q";     success = minDiff;              }       //no good matches
1350                 else  {
1351                     bool foundMatch = false;
1352                     vector<int> matches;
1353                     for (int i = 0; i < minFGroup.size(); i++) {
1354                         for (int j = 0; j < minFGroup[i].size(); j++) {
1355                             for (int k = 0; k < minRGroup.size(); k++) {
1356                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1357                             }
1358                         }
1359                     }
1360                     
1361                     int fStart = 0;
1362                     int rStart = 0;
1363                     if (matches.size() == 1) {
1364                         foundMatch = true;
1365                         group = matches[0];
1366                         fStart = minFPos[0];
1367                         rStart = rawSeq.length() - minRPos[0];
1368                     }
1369                     
1370                     //we have an acceptable match for the forward and reverse, but do they match?
1371                     if (foundMatch) {
1372                         if (!keepForward) {
1373                             string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
1374                             seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
1375                             if(qual.getName() != ""){
1376                                 qual.trimQScores(-1, rStart);
1377                                 qual.trimQScores(fStart, -1);
1378                             }
1379                         }
1380                         success = minDiff;
1381                         //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
1382                     }else { if (trashCode == "") { trashCode = "pq"; } success = pdiffs + 100;  }
1383                 }
1384                         //}
1385                         
1386                         if (alignment != NULL) {  delete alignment;  }
1387                 }
1388                 
1389         //catchall for case I didn't think of
1390         if ((trashCode == "") && (success > pdiffs)) { trashCode = "pq"; }
1391         
1392                 return success;
1393                 
1394         }
1395         catch(exception& e) {
1396                 m->errorOut(e, "TrimOligos", "stripPairedPrimers");
1397                 exit(1);
1398         }
1399         
1400 }
1401
1402 //*******************************************************************/
1403 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
1404         try {
1405                 //look for forward barcode
1406                 string rawFSequence = forwardSeq.getUnaligned();
1407         string rawRSequence = reverseSeq.getUnaligned();
1408                 int success = pdiffs + 1;       //guilty until proven innocent
1409                 
1410                 //can you find the forward barcode
1411                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1412                         string foligo = it->second.forward;
1413             string roligo = it->second.reverse;
1414             
1415                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
1416                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1417                                 break;  
1418                         }
1419             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1420                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1421                                 break;  
1422                         }
1423                         
1424                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1425                                 group = it->first;
1426                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1427                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1428                 forwardQual.trimQScores(foligo.length(), -1);
1429                 reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
1430                                 success = 0;
1431                                 break;
1432                         }
1433                 }
1434                 
1435                 //if you found the barcode or if you don't want to allow for diffs
1436                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1437                 else { //try aligning and see if you can find it
1438                         Alignment* alignment;
1439                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1440                         else{ alignment = NULL; } 
1441                         
1442                         //can you find the barcode
1443                         int minDiff = 1e6;
1444                         int minCount = 1;
1445                         vector< vector<int> > minFGroup;
1446                         vector<int> minFPos; 
1447             
1448             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1449             /*
1450              1   Sarah   Westcott
1451              2   John    Westcott
1452              3   Anna    Westcott
1453              4   Sarah   Schloss
1454              5   Pat     Schloss
1455              6   Gail    Brown
1456              7   Pat     Moore
1457              
1458              only want to look for forward = Sarah, John, Anna, Pat, Gail
1459              reverse = Westcott, Schloss, Brown, Moore
1460              
1461              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
1462              */
1463             //cout << endl << forwardSeq.getName() << endl;         
1464                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1465                                 string oligo = it->first;
1466                                 
1467                                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1468                                         success = pdiffs + 10;
1469                                         break;
1470                                 }
1471                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1472                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1473                                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1474                                 oligo = alignment->getSeqAAln();
1475                                 string temp = alignment->getSeqBAln();
1476                 
1477                                 int alnLength = oligo.length();
1478                                 
1479                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1480                                 oligo = oligo.substr(0,alnLength);
1481                                 temp = temp.substr(0,alnLength);
1482                 int numDiff = countDiffs(oligo, temp);
1483                                 
1484                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1485                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1486                 
1487                                 if(numDiff < minDiff){
1488                                         minDiff = numDiff;
1489                                         minCount = 1;
1490                                         minFGroup.clear();
1491                     minFGroup.push_back(it->second);
1492                                         int tempminFPos = 0;
1493                     minFPos.clear();
1494                                         for(int i=0;i<alnLength;i++){
1495                                                 if(temp[i] != '-'){
1496                                                         tempminFPos++;
1497                                                 }
1498                                         }
1499                     minFPos.push_back(tempminFPos);
1500                                 }else if(numDiff == minDiff){
1501                                         minFGroup.push_back(it->second);
1502                     int tempminFPos = 0;
1503                                         for(int i=0;i<alnLength;i++){
1504                                                 if(temp[i] != '-'){
1505                                                         tempminFPos++;
1506                                                 }
1507                                         }
1508                     minFPos.push_back(tempminFPos);
1509                                 }
1510                         }
1511             
1512                         //cout << minDiff << '\t' << minCount << '\t' << endl;
1513                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1514                         else{   
1515                 //check for reverse match
1516                 if (alignment != NULL) {  delete alignment;  }
1517                 
1518                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
1519                 else{ alignment = NULL; } 
1520                 
1521                 //can you find the barcode
1522                 minDiff = 1e6;
1523                 minCount = 1;
1524                 vector< vector<int> > minRGroup;
1525                 vector<int> minRPos;
1526                 
1527                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1528                     string oligo = it->first;
1529                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1530                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1531                         success = pdiffs + 10;
1532                         break;
1533                     }
1534                     
1535                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1536                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1537                     oligo = alignment->getSeqAAln();
1538                     string temp = alignment->getSeqBAln();
1539                     
1540                     int alnLength = oligo.length();
1541                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1542                     oligo = oligo.substr(0,alnLength);
1543                     temp = temp.substr(0,alnLength);
1544                     int numDiff = countDiffs(oligo, temp);
1545                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1546                     
1547                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1548                     if(numDiff < minDiff){
1549                         minDiff = numDiff;
1550                         minCount = 1;
1551                         minRGroup.clear();
1552                         minRGroup.push_back(it->second);
1553                         int tempminRPos = 0;
1554                         minRPos.clear();
1555                         for(int i=0;i<alnLength;i++){
1556                             if(temp[i] != '-'){
1557                                 tempminRPos++;
1558                             }
1559                         }
1560                         minRPos.push_back(tempminRPos);                    
1561                     }else if(numDiff == minDiff){
1562                         int tempminRPos = 0;
1563                         for(int i=0;i<alnLength;i++){
1564                             if(temp[i] != '-'){
1565                                 tempminRPos++;
1566                             }
1567                         }
1568                         minRPos.push_back(tempminRPos);  
1569                         minRGroup.push_back(it->second);
1570                     }
1571                     
1572                 }
1573                 
1574                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1575                  for (int i = 0; i < minFGroup.size(); i++) { 
1576                  cout << i << '\t';
1577                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1578                  cout << endl;
1579                  }
1580                  cout << endl;
1581                  for (int i = 0; i < minRGroup.size(); i++) { 
1582                  cout << i << '\t';
1583                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1584                  cout << endl;
1585                  }
1586                  cout << endl;*/
1587                 if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1588                 else  {  
1589                     bool foundMatch = false;
1590                     vector<int> matches;
1591                     for (int i = 0; i < minFGroup.size(); i++) {
1592                         for (int j = 0; j < minFGroup[i].size(); j++) {
1593                             for (int k = 0; k < minRGroup.size(); k++) {
1594                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1595                             }
1596                         }
1597                     }
1598                     
1599                     int fStart = 0;
1600                     int rStart = 0;
1601                     if (matches.size() == 1) { 
1602                         foundMatch = true;
1603                         group = matches[0]; 
1604                         fStart = minFPos[0];
1605                         rStart = minRPos[0];
1606                     }
1607                     
1608                     //we have an acceptable match for the forward and reverse, but do they match?
1609                     if (foundMatch) {
1610                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1611                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1612                         forwardQual.trimQScores(fStart, -1);
1613                         reverseQual.trimQScores(rStart, -1);
1614                         success = minDiff;
1615                     }else { success = pdiffs + 100;     }
1616                 }
1617                         }
1618                         
1619                         if (alignment != NULL) {  delete alignment;  }
1620                 }
1621                 
1622                 return success;
1623                 
1624         }
1625         catch(exception& e) {
1626                 m->errorOut(e, "TrimOligos", "stripIForward");
1627                 exit(1);
1628         }
1629         
1630 }
1631 //*******************************************************************/
1632 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
1633         try {
1634                 //look for forward barcode
1635                 string rawFSequence = forwardSeq.getUnaligned();
1636         string rawRSequence = reverseSeq.getUnaligned();
1637                 int success = pdiffs + 1;       //guilty until proven innocent
1638                 
1639                 //can you find the forward barcode
1640                 for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
1641                         string foligo = it->second.forward;
1642             string roligo = it->second.reverse;
1643             
1644                         if(rawFSequence.length() < foligo.length()){    //let's just assume that the barcodes are the same length
1645                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1646                                 break;  
1647                         }
1648             if(rawRSequence.length() < roligo.length()){        //let's just assume that the barcodes are the same length
1649                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1650                                 break;  
1651                         }
1652                         
1653                         if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
1654                                 group = it->first;
1655                                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
1656                 reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
1657                                 success = 0;
1658                                 break;
1659                         }
1660                 }
1661                 
1662                 //if you found the barcode or if you don't want to allow for diffs
1663                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1664                 else { //try aligning and see if you can find it
1665                         Alignment* alignment;
1666                         if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1667                         else{ alignment = NULL; } 
1668                         
1669                         //can you find the barcode
1670                         int minDiff = 1e6;
1671                         int minCount = 1;
1672                         vector< vector<int> > minFGroup;
1673                         vector<int> minFPos; 
1674             
1675             //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
1676             /*
1677              1   Sarah   Westcott
1678              2   John    Westcott
1679              3   Anna    Westcott
1680              4   Sarah   Schloss
1681              5   Pat     Schloss
1682              6   Gail    Brown
1683              7   Pat     Moore
1684              
1685              only want to look for forward = Sarah, John, Anna, Pat, Gail
1686              reverse = Westcott, Schloss, Brown, Moore
1687              
1688              but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
1689              */
1690             //cout << endl << forwardSeq.getName() << endl;         
1691                         for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
1692                                 string oligo = it->first;
1693                                 
1694                                 if(rawFSequence.length() < maxFPrimerLength){   //let's just assume that the barcodes are the same length
1695                                         success = pdiffs + 10;
1696                                         break;
1697                                 }
1698                                 //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
1699                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1700                                 alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
1701                                 oligo = alignment->getSeqAAln();
1702                                 string temp = alignment->getSeqBAln();
1703                 
1704                                 int alnLength = oligo.length();
1705                                 
1706                                 for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
1707                                 oligo = oligo.substr(0,alnLength);
1708                                 temp = temp.substr(0,alnLength);
1709                 int numDiff = countDiffs(oligo, temp);
1710                                 
1711                 if (alnLength == 0) { numDiff = pdiffs + 100; }
1712                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1713                 
1714                                 if(numDiff < minDiff){
1715                                         minDiff = numDiff;
1716                                         minCount = 1;
1717                                         minFGroup.clear();
1718                     minFGroup.push_back(it->second);
1719                                         int tempminFPos = 0;
1720                     minFPos.clear();
1721                                         for(int i=0;i<alnLength;i++){
1722                                                 if(temp[i] != '-'){
1723                                                         tempminFPos++;
1724                                                 }
1725                                         }
1726                     minFPos.push_back(tempminFPos);
1727                                 }else if(numDiff == minDiff){
1728                                         minFGroup.push_back(it->second);
1729                     int tempminFPos = 0;
1730                                         for(int i=0;i<alnLength;i++){
1731                                                 if(temp[i] != '-'){
1732                                                         tempminFPos++;
1733                                                 }
1734                                         }
1735                     minFPos.push_back(tempminFPos);
1736                                 }
1737                         }
1738             
1739                         //cout << minDiff << '\t' << minCount << '\t' << endl;
1740                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1741                         else{   
1742                 //check for reverse match
1743                 if (alignment != NULL) {  delete alignment;  }
1744                 
1745                 if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
1746                 else{ alignment = NULL; } 
1747                 
1748                 //can you find the barcode
1749                 minDiff = 1e6;
1750                 minCount = 1;
1751                 vector< vector<int> > minRGroup;
1752                 vector<int> minRPos;
1753                 
1754                 for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
1755                     string oligo = it->first;
1756                     //cout << "before = " << oligo << '\t' << rawRSequence.substr(0,oligo.length()+pdiffs) << endl;
1757                     if(rawRSequence.length() < maxRPrimerLength){       //let's just assume that the barcodes are the same length
1758                         success = pdiffs + 10;
1759                         break;
1760                     }
1761                     
1762                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1763                     alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
1764                     oligo = alignment->getSeqAAln();
1765                     string temp = alignment->getSeqBAln();
1766                     
1767                     int alnLength = oligo.length();
1768                     for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){  alnLength = i+1;        break;  } }
1769                     oligo = oligo.substr(0,alnLength);
1770                     temp = temp.substr(0,alnLength);
1771                     int numDiff = countDiffs(oligo, temp);
1772                     if (alnLength == 0) { numDiff = pdiffs + 100; }
1773                     
1774                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
1775                     if(numDiff < minDiff){
1776                         minDiff = numDiff;
1777                         minCount = 1;
1778                         minRGroup.clear();
1779                         minRGroup.push_back(it->second);
1780                         int tempminRPos = 0;
1781                         minRPos.clear();
1782                         for(int i=0;i<alnLength;i++){
1783                             if(temp[i] != '-'){
1784                                 tempminRPos++;
1785                             }
1786                         }
1787                         minRPos.push_back(tempminRPos);                    
1788                     }else if(numDiff == minDiff){
1789                         int tempminRPos = 0;
1790                         for(int i=0;i<alnLength;i++){
1791                             if(temp[i] != '-'){
1792                                 tempminRPos++;
1793                             }
1794                         }
1795                         minRPos.push_back(tempminRPos);  
1796                         minRGroup.push_back(it->second);
1797                     }
1798                     
1799                 }
1800                 
1801                 /*cout << minDiff << '\t' << minCount << '\t' << endl;
1802                  for (int i = 0; i < minFGroup.size(); i++) { 
1803                  cout << i << '\t';
1804                  for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
1805                  cout << endl;
1806                  }
1807                  cout << endl;
1808                  for (int i = 0; i < minRGroup.size(); i++) { 
1809                  cout << i << '\t';
1810                  for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
1811                  cout << endl;
1812                  }
1813                  cout << endl;*/
1814                 if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1815                 else  {  
1816                     bool foundMatch = false;
1817                     vector<int> matches;
1818                     for (int i = 0; i < minFGroup.size(); i++) {
1819                         for (int j = 0; j < minFGroup[i].size(); j++) {
1820                             for (int k = 0; k < minRGroup.size(); k++) {
1821                                 if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
1822                             }
1823                         }
1824                     }
1825                     
1826                     int fStart = 0;
1827                     int rStart = 0;
1828                     if (matches.size() == 1) { 
1829                         foundMatch = true;
1830                         group = matches[0]; 
1831                         fStart = minFPos[0];
1832                         rStart = minRPos[0];
1833                     }
1834                     
1835                     //we have an acceptable match for the forward and reverse, but do they match?
1836                     if (foundMatch) {
1837                         forwardSeq.setUnaligned(rawFSequence.substr(fStart));
1838                         reverseSeq.setUnaligned(rawRSequence.substr(rStart));
1839                         success = minDiff;
1840                     }else { success = pdiffs + 100;     }
1841                 }
1842                         }
1843                         
1844                         if (alignment != NULL) {  delete alignment;  }
1845                 }
1846                 
1847                 return success;
1848                 
1849         }
1850         catch(exception& e) {
1851                 m->errorOut(e, "TrimOligos", "stripIForward");
1852                 exit(1);
1853         }
1854         
1855 }
1856 //*******************************************************************/
1857 int TrimOligos::stripBarcode(Sequence& seq, int& group){
1858         try {
1859                 
1860                 string rawSequence = seq.getUnaligned();
1861                 int success = bdiffs + 1;       //guilty until proven innocent
1862                 
1863                 //can you find the barcode
1864                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1865                         string oligo = it->first;
1866                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1867                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1868                                 break;  
1869                         }
1870                         
1871                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1872                                 group = it->second;
1873                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1874                                 
1875                                 success = 0;
1876                                 break;
1877                         }
1878                 }
1879                 
1880                 //if you found the barcode or if you don't want to allow for diffs
1881                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1882                 
1883                 else { //try aligning and see if you can find it
1884             Alignment* alignment;
1885                         if (barcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));  }
1886                         else{ alignment = NULL; } 
1887                         
1888                         //can you find the barcode
1889                         int minDiff = 1e6;
1890                         int minCount = 1;
1891                         int minGroup = -1;
1892                         int minPos = 0;
1893                         
1894                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1895                                 string oligo = it->first;
1896                                 //                              int length = oligo.length();
1897                                 
1898                                 if(rawSequence.length() < maxFBarcodeLength){   //let's just assume that the barcodes are the same length
1899                                         success = bdiffs + 10;
1900                                         break;
1901                                 }
1902                                 
1903                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1904                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1905                                 oligo = alignment->getSeqAAln();
1906                                 string temp = alignment->getSeqBAln();
1907                                  
1908                                 int alnLength = oligo.length();
1909                                 
1910                                 for(int i=oligo.length()-1;i>=0;i--){
1911                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1912                                 }
1913                                 oligo = oligo.substr(0,alnLength);
1914                                 temp = temp.substr(0,alnLength);
1915                                  
1916                                 int numDiff = countDiffs(oligo, temp);
1917                                 
1918                                 if(numDiff < minDiff){
1919                                         minDiff = numDiff;
1920                                         minCount = 1;
1921                                         minGroup = it->second;
1922                                         minPos = 0;
1923                                         for(int i=0;i<alnLength;i++){
1924                                                 if(temp[i] != '-'){
1925                                                         minPos++;
1926                                                 }
1927                                         }
1928                                 }
1929                                 else if(numDiff == minDiff){
1930                                         minCount++;
1931                                 }
1932                                 
1933                         }
1934                         
1935                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1936                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1937                         else{                                                                                                   //use the best match
1938                                 group = minGroup;
1939                                 seq.setUnaligned(rawSequence.substr(minPos));
1940                                 success = minDiff;
1941                         }
1942                         
1943                         if (alignment != NULL) {  delete alignment;  }
1944                         
1945                 }
1946                 
1947                 return success;
1948                 
1949         }
1950         catch(exception& e) {
1951                 m->errorOut(e, "TrimOligos", "stripBarcode");
1952                 exit(1);
1953         }
1954         
1955 }
1956
1957 /********************************************************************/
1958 int TrimOligos::stripForward(Sequence& seq, int& group){
1959         try {
1960                 
1961                 string rawSequence = seq.getUnaligned();
1962                 int success = pdiffs + 1;       //guilty until proven innocent
1963                 
1964                 //can you find the primer
1965                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1966                         string oligo = it->first;
1967                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1968                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1969                                 break;  
1970                         }
1971                         
1972                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1973                                 group = it->second;
1974                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1975                                 success = 0;
1976                                 break;
1977                         }
1978                 }
1979                 
1980                 //if you found the barcode or if you don't want to allow for diffs
1981                 if ((pdiffs == 0) || (success == 0)) {  return success;  }
1982                 
1983                 else { //try aligning and see if you can find it
1984                         Alignment* alignment;
1985                         if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
1986             else{ alignment = NULL; } 
1987                         
1988                         //can you find the barcode
1989                         int minDiff = 1e6;
1990                         int minCount = 1;
1991                         int minGroup = -1;
1992                         int minPos = 0;
1993                         
1994                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1995                                 string oligo = it->first;
1996                                 //                              int length = oligo.length();
1997                                 
1998                                 if(rawSequence.length() < maxFPrimerLength){    
1999                                         success = pdiffs + 100;
2000                                         break;
2001                                 }
2002                                 
2003                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2004                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
2005                                 oligo = alignment->getSeqAAln();
2006                                 string temp = alignment->getSeqBAln();
2007                                 
2008                                 int alnLength = oligo.length();
2009                                 
2010                                 for(int i=oligo.length()-1;i>=0;i--){
2011                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2012                                 }
2013                                 oligo = oligo.substr(0,alnLength);
2014                                 temp = temp.substr(0,alnLength);
2015                                 
2016                                 int numDiff = countDiffs(oligo, temp);
2017                                 
2018                                 if(numDiff < minDiff){
2019                                         minDiff = numDiff;
2020                                         minCount = 1;
2021                                         minGroup = it->second;
2022                                         minPos = 0;
2023                                         for(int i=0;i<alnLength;i++){
2024                                                 if(temp[i] != '-'){
2025                                                         minPos++;
2026                                                 }
2027                                         }
2028                                 }
2029                                 else if(numDiff == minDiff){
2030                                         minCount++;
2031                                 }
2032                                 
2033                         }
2034                         
2035                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
2036                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
2037                         else{                                                                                                   //use the best match
2038                                 group = minGroup;
2039                                 seq.setUnaligned(rawSequence.substr(minPos));
2040                                 success = minDiff;
2041                         }
2042                         
2043                         if (alignment != NULL) {  delete alignment;  }
2044                         
2045                 }
2046                 
2047                 return success;
2048                 
2049         }
2050         catch(exception& e) {
2051                 m->errorOut(e, "TrimOligos", "stripForward");
2052                 exit(1);
2053         }
2054 }
2055 //*******************************************************************/
2056 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
2057         try {
2058         
2059         if (paired) {  int success = stripPairedPrimers(seq, qual, group, keepForward);  return success; }
2060         
2061                 string rawSequence = seq.getUnaligned();
2062                 int success = pdiffs + 1;       //guilty until proven innocent
2063                 
2064                 //can you find the primer
2065                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2066                         string oligo = it->first;
2067                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
2068                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
2069                                 break;  
2070                         }
2071                         
2072                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2073                                 group = it->second;
2074                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
2075                                 if(qual.getName() != ""){
2076                                         if (!keepForward) {  qual.trimQScores(oligo.length(), -1); }
2077                                 }
2078                                 success = 0;
2079                                 break;
2080                         }
2081                 }
2082                 
2083                 //if you found the barcode or if you don't want to allow for diffs
2084                 if ((pdiffs == 0) || (success == 0)) { return success;  }
2085                 
2086                 else { //try aligning and see if you can find it
2087                         Alignment* alignment;
2088                         if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
2089                         else{ alignment = NULL; } 
2090                         
2091                         //can you find the barcode
2092                         int minDiff = 1e6;
2093                         int minCount = 1;
2094                         int minGroup = -1;
2095                         int minPos = 0;
2096                         
2097                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
2098                                 string oligo = it->first;
2099                                 //                              int length = oligo.length();
2100                                 
2101                                 if(rawSequence.length() < maxFPrimerLength){    
2102                                         success = pdiffs + 100;
2103                                         break;
2104                                 }
2105                                 
2106                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2107                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
2108                                 oligo = alignment->getSeqAAln();
2109                                 string temp = alignment->getSeqBAln();
2110                                 
2111                                 int alnLength = oligo.length();
2112                                 
2113                                 for(int i=oligo.length()-1;i>=0;i--){
2114                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2115                                 }
2116                                 oligo = oligo.substr(0,alnLength);
2117                                 temp = temp.substr(0,alnLength);
2118                                 
2119                                 int numDiff = countDiffs(oligo, temp);
2120                                 
2121                                 if(numDiff < minDiff){
2122                                         minDiff = numDiff;
2123                                         minCount = 1;
2124                                         minGroup = it->second;
2125                                         minPos = 0;
2126                                         for(int i=0;i<alnLength;i++){
2127                                                 if(temp[i] != '-'){
2128                                                         minPos++;
2129                                                 }
2130                                         }
2131                                 }
2132                                 else if(numDiff == minDiff){
2133                                         minCount++;
2134                                 }
2135                                 
2136                         }
2137                         
2138                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
2139                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
2140                         else{                                                                                                   //use the best match
2141                                 group = minGroup;
2142                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
2143                                 if(qual.getName() != ""){
2144                                         if (!keepForward) { qual.trimQScores(minPos, -1); }
2145                                 }
2146                                 success = minDiff;
2147                         }
2148                         
2149                         if (alignment != NULL) {  delete alignment;  }
2150                         
2151                 }
2152                 
2153                 return success;
2154                 
2155         }
2156         catch(exception& e) {
2157                 m->errorOut(e, "TrimOligos", "stripForward");
2158                 exit(1);
2159         }
2160 }
2161 //******************************************************************/
2162 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
2163         try {
2164                 string rawSequence = seq.getUnaligned();
2165                 bool success = 0;       //guilty until proven innocent
2166                 
2167                 for(int i=0;i<revPrimer.size();i++){
2168                         string oligo = revPrimer[i];
2169                         
2170                         if(rawSequence.length() < oligo.length()){
2171                                 success = 0;
2172                                 break;
2173                         }
2174                         
2175                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2176                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2177                                 if(qual.getName() != ""){
2178                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
2179                                 }
2180                                 success = 1;
2181                                 break;
2182                         }
2183                 }       
2184                 return success;
2185                 
2186         }
2187         catch(exception& e) {
2188                 m->errorOut(e, "TrimOligos", "stripReverse");
2189                 exit(1);
2190         }
2191 }
2192 //******************************************************************/
2193 bool TrimOligos::stripReverse(Sequence& seq){
2194         try {
2195                 
2196                 string rawSequence = seq.getUnaligned();
2197                 bool success = 0;       //guilty until proven innocent
2198                 
2199                 for(int i=0;i<revPrimer.size();i++){
2200                         string oligo = revPrimer[i];
2201                         
2202                         if(rawSequence.length() < oligo.length()){
2203                                 success = 0;
2204                                 break;
2205                         }
2206                         
2207                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
2208                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
2209                                 success = 1;
2210                                 break;
2211                         }
2212                 }       
2213                 
2214                 return success;
2215                 
2216         }
2217         catch(exception& e) {
2218                 m->errorOut(e, "TrimOligos", "stripReverse");
2219                 exit(1);
2220         }
2221 }
2222 //******************************************************************/
2223 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
2224         try {
2225                 string rawSequence = seq.getUnaligned();
2226                 bool success = ldiffs + 1;      //guilty until proven innocent
2227                 
2228                 for(int i=0;i<linker.size();i++){
2229                         string oligo = linker[i];
2230                         
2231                         if(rawSequence.length() < oligo.length()){
2232                                 success = ldiffs + 10;
2233                                 break;
2234                         }
2235                         
2236                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2237                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2238                                 if(qual.getName() != ""){
2239                                         qual.trimQScores(oligo.length(), -1);
2240                                 }
2241                                 success = 0;
2242                                 break;
2243                         }
2244                 }
2245         
2246         //if you found the linker or if you don't want to allow for diffs
2247                 if ((ldiffs == 0) || (success == 0)) { return success;  }
2248                 
2249                 else { //try aligning and see if you can find it
2250                         Alignment* alignment;
2251                         if (linker.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));   }     
2252                         else{ alignment = NULL; } 
2253                         
2254                         //can you find the barcode
2255                         int minDiff = 1e6;
2256                         int minCount = 1;
2257                         int minPos = 0;
2258                         
2259                         for(int i = 0; i < linker.size(); i++){
2260                                 string oligo = linker[i];
2261                                 //                              int length = oligo.length();
2262                                 
2263                                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2264                                         success = ldiffs + 10;
2265                                         break;
2266                                 }
2267                                 
2268                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2269                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2270                                 oligo = alignment->getSeqAAln();
2271                                 string temp = alignment->getSeqBAln();
2272                                 
2273                                 int alnLength = oligo.length();
2274                                 
2275                                 for(int i=oligo.length()-1;i>=0;i--){
2276                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2277                                 }
2278                                 oligo = oligo.substr(0,alnLength);
2279                                 temp = temp.substr(0,alnLength);
2280                                 
2281                                 int numDiff = countDiffs(oligo, temp);
2282                                 
2283                                 if(numDiff < minDiff){
2284                                         minDiff = numDiff;
2285                                         minCount = 1;
2286                                         minPos = 0;
2287                                         for(int i=0;i<alnLength;i++){
2288                                                 if(temp[i] != '-'){
2289                                                         minPos++;
2290                                                 }
2291                                         }
2292                                 }
2293                                 else if(numDiff == minDiff){
2294                                         minCount++;
2295                                 }
2296                                 
2297                         }
2298                         
2299                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
2300                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2301                         else{                                                                                                   //use the best match
2302                                 seq.setUnaligned(rawSequence.substr(minPos));
2303                                 
2304                                 if(qual.getName() != ""){
2305                                         qual.trimQScores(minPos, -1);
2306                                 }
2307                                 success = minDiff;
2308                         }
2309                         
2310                         if (alignment != NULL) {  delete alignment;  }
2311                         
2312                 }
2313
2314        
2315                 return success;
2316                 
2317         }
2318         catch(exception& e) {
2319                 m->errorOut(e, "TrimOligos", "stripLinker");
2320                 exit(1);
2321         }
2322 }
2323 //******************************************************************/
2324 bool TrimOligos::stripLinker(Sequence& seq){
2325         try {
2326                 
2327                 string rawSequence = seq.getUnaligned();
2328                 bool success = ldiffs +1;       //guilty until proven innocent
2329                 
2330                 for(int i=0;i<linker.size();i++){
2331                         string oligo = linker[i];
2332                         
2333                         if(rawSequence.length() < oligo.length()){
2334                                 success = ldiffs +10;
2335                                 break;
2336                         }
2337                         
2338                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2339                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2340                                 success = 0;
2341                                 break;
2342                         }
2343                 }       
2344                 
2345         //if you found the linker or if you don't want to allow for diffs
2346                 if ((ldiffs == 0) || (success == 0)) { return success;  }
2347                 
2348                 else { //try aligning and see if you can find it
2349                         Alignment* alignment;
2350                         if (linker.size() > 0) {alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLinkerLength+ldiffs+1));  }
2351             else{ alignment = NULL; } 
2352                         
2353                         //can you find the barcode
2354                         int minDiff = 1e6;
2355                         int minCount = 1;
2356                         int minPos = 0;
2357                         
2358                         for(int i = 0; i < linker.size(); i++){
2359                                 string oligo = linker[i];
2360                                 //                              int length = oligo.length();
2361                                 
2362                                 if(rawSequence.length() < maxLinkerLength){     //let's just assume that the barcodes are the same length
2363                                         success = ldiffs + 10;
2364                                         break;
2365                                 }
2366                                 
2367                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2368                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
2369                                 oligo = alignment->getSeqAAln();
2370                                 string temp = alignment->getSeqBAln();
2371                                 
2372                                 int alnLength = oligo.length();
2373                                 
2374                                 for(int i=oligo.length()-1;i>=0;i--){
2375                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2376                                 }
2377                                 oligo = oligo.substr(0,alnLength);
2378                                 temp = temp.substr(0,alnLength);
2379                                 
2380                                 int numDiff = countDiffs(oligo, temp);
2381                                 
2382                                 if(numDiff < minDiff){
2383                                         minDiff = numDiff;
2384                                         minCount = 1;
2385                                         minPos = 0;
2386                                         for(int i=0;i<alnLength;i++){
2387                                                 if(temp[i] != '-'){
2388                                                         minPos++;
2389                                                 }
2390                                         }
2391                                 }
2392                                 else if(numDiff == minDiff){
2393                                         minCount++;
2394                                 }
2395                                 
2396                         }
2397                         
2398                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
2399                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
2400                         else{                                                                                                   //use the best match
2401                                 seq.setUnaligned(rawSequence.substr(minPos));
2402                                 success = minDiff;
2403                         }
2404                         
2405                         if (alignment != NULL) {  delete alignment;  }
2406                         
2407                 }
2408
2409                 return success;
2410                 
2411         }
2412         catch(exception& e) {
2413                 m->errorOut(e, "TrimOligos", "stripLinker");
2414                 exit(1);
2415         }
2416 }
2417
2418 //******************************************************************/
2419 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
2420         try {
2421                 string rawSequence = seq.getUnaligned();
2422                 bool success = sdiffs+1;        //guilty until proven innocent
2423                 
2424                 for(int i=0;i<spacer.size();i++){
2425                         string oligo = spacer[i];
2426                         
2427                         if(rawSequence.length() < oligo.length()){
2428                                 success = sdiffs+10;
2429                                 break;
2430                         }
2431                         
2432                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2433                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2434                                 if(qual.getName() != ""){
2435                                         qual.trimQScores(oligo.length(), -1);
2436                                 }
2437                                 success = 0;
2438                                 break;
2439                         }
2440                 }
2441         
2442         //if you found the spacer or if you don't want to allow for diffs
2443                 if ((sdiffs == 0) || (success == 0)) { return success;  }
2444                 
2445                 else { //try aligning and see if you can find it
2446                         Alignment* alignment;
2447                         if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
2448             else{ alignment = NULL; } 
2449                         
2450                         //can you find the barcode
2451                         int minDiff = 1e6;
2452                         int minCount = 1;
2453                         int minPos = 0;
2454                         
2455                         for(int i = 0; i < spacer.size(); i++){
2456                                 string oligo = spacer[i];
2457                                 //                              int length = oligo.length();
2458                                 
2459                                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2460                                         success = sdiffs + 10;
2461                                         break;
2462                                 }
2463                                 
2464                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2465                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2466                                 oligo = alignment->getSeqAAln();
2467                                 string temp = alignment->getSeqBAln();
2468                                 
2469                                 int alnLength = oligo.length();
2470                                 
2471                                 for(int i=oligo.length()-1;i>=0;i--){
2472                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2473                                 }
2474                                 oligo = oligo.substr(0,alnLength);
2475                                 temp = temp.substr(0,alnLength);
2476                                 
2477                                 int numDiff = countDiffs(oligo, temp);
2478                                 
2479                                 if(numDiff < minDiff){
2480                                         minDiff = numDiff;
2481                                         minCount = 1;
2482                                         minPos = 0;
2483                                         for(int i=0;i<alnLength;i++){
2484                                                 if(temp[i] != '-'){
2485                                                         minPos++;
2486                                                 }
2487                                         }
2488                                 }
2489                                 else if(numDiff == minDiff){
2490                                         minCount++;
2491                                 }
2492                                 
2493                         }
2494                         
2495                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
2496                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2497                         else{                                                                                                   //use the best match
2498                                 seq.setUnaligned(rawSequence.substr(minPos));
2499                                 
2500                                 if(qual.getName() != ""){
2501                                         qual.trimQScores(minPos, -1);
2502                                 }
2503                                 success = minDiff;
2504                         }
2505                         
2506                         if (alignment != NULL) {  delete alignment;  }
2507                         
2508                 }
2509         
2510
2511                 return success;
2512                 
2513         }
2514         catch(exception& e) {
2515                 m->errorOut(e, "TrimOligos", "stripSpacer");
2516                 exit(1);
2517         }
2518 }
2519 //******************************************************************/
2520 bool TrimOligos::stripSpacer(Sequence& seq){
2521         try {
2522                 
2523                 string rawSequence = seq.getUnaligned();
2524                 bool success = sdiffs+1;        //guilty until proven innocent
2525                 
2526                 for(int i=0;i<spacer.size();i++){
2527                         string oligo = spacer[i];
2528                         
2529                         if(rawSequence.length() < oligo.length()){
2530                                 success = sdiffs+10;
2531                                 break;
2532                         }
2533                         
2534                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
2535                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
2536                                 success = 0;
2537                                 break;
2538                         }
2539                 }       
2540                 
2541         //if you found the spacer or if you don't want to allow for diffs
2542                 if ((sdiffs == 0) || (success == 0)) { return success;  }
2543                 
2544                 else { //try aligning and see if you can find it
2545                         Alignment* alignment;
2546                         if (spacer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxSpacerLength+sdiffs+1));  }
2547             else{ alignment = NULL; } 
2548                         
2549                         //can you find the barcode
2550                         int minDiff = 1e6;
2551                         int minCount = 1;
2552                         int minPos = 0;
2553                         
2554                         for(int i = 0; i < spacer.size(); i++){
2555                                 string oligo = spacer[i];
2556                                 //                              int length = oligo.length();
2557                                 
2558                                 if(rawSequence.length() < maxSpacerLength){     //let's just assume that the barcodes are the same length
2559                                         success = sdiffs + 10;
2560                                         break;
2561                                 }
2562                                 
2563                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
2564                                 alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
2565                                 oligo = alignment->getSeqAAln();
2566                                 string temp = alignment->getSeqBAln();
2567                                 
2568                                 int alnLength = oligo.length();
2569                                 
2570                                 for(int i=oligo.length()-1;i>=0;i--){
2571                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
2572                                 }
2573                                 oligo = oligo.substr(0,alnLength);
2574                                 temp = temp.substr(0,alnLength);
2575                                 
2576                                 int numDiff = countDiffs(oligo, temp);
2577                                 
2578                                 if(numDiff < minDiff){
2579                                         minDiff = numDiff;
2580                                         minCount = 1;
2581                                         minPos = 0;
2582                                         for(int i=0;i<alnLength;i++){
2583                                                 if(temp[i] != '-'){
2584                                                         minPos++;
2585                                                 }
2586                                         }
2587                                 }
2588                                 else if(numDiff == minDiff){
2589                                         minCount++;
2590                                 }
2591                                 
2592                         }
2593                         
2594                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
2595                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
2596                         else{                                                                                                   //use the best match
2597                                 seq.setUnaligned(rawSequence.substr(minPos));
2598                                 success = minDiff;
2599                         }
2600                         
2601                         if (alignment != NULL) {  delete alignment;  }
2602                         
2603                 }
2604
2605                 return success;
2606                 
2607         }
2608         catch(exception& e) {
2609                 m->errorOut(e, "TrimOligos", "stripSpacer");
2610                 exit(1);
2611         }
2612 }
2613
2614 //******************************************************************/
2615 bool TrimOligos::compareDNASeq(string oligo, string seq){
2616         try {
2617                 bool success = 1;
2618                 int length = oligo.length();
2619                 
2620                 for(int i=0;i<length;i++){
2621                         
2622                         if(oligo[i] != seq[i]){
2623                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
2624                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
2625                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
2626                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
2627                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
2628                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
2629                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
2630                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
2631                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2632                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
2633                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
2634                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
2635                                 
2636                                 if(success == 0)        {       break;   }
2637                         }
2638                         else{
2639                                 success = 1;
2640                         }
2641                 }
2642                 
2643                 return success;
2644         }
2645         catch(exception& e) {
2646                 m->errorOut(e, "TrimOligos", "compareDNASeq");
2647                 exit(1);
2648         }
2649         
2650 }
2651 //********************************************************************/
2652 int TrimOligos::countDiffs(string oligo, string seq){
2653         try {
2654                 
2655                 int length = oligo.length();
2656                 int countDiffs = 0;
2657                 
2658                 for(int i=0;i<length;i++){
2659                         
2660                         if(oligo[i] != seq[i]){
2661                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
2662                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
2663                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
2664                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
2665                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
2666                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
2667                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
2668                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
2669                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2670                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
2671                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
2672                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
2673                         }
2674                         
2675                 }
2676                 
2677                 return countDiffs;
2678         }
2679         catch(exception& e) {
2680                 m->errorOut(e, "TrimOligos", "countDiffs");
2681                 exit(1);
2682         }
2683 }
2684 //********************************************************************/
2685 string TrimOligos::reverseOligo(string oligo){
2686         try {
2687         string reverse = "";
2688         
2689         for(int i=oligo.length()-1;i>=0;i--){
2690             
2691             if(oligo[i] == 'A')         {       reverse += 'T'; }
2692             else if(oligo[i] == 'T'){   reverse += 'A'; }
2693             else if(oligo[i] == 'U'){   reverse += 'A'; }
2694             
2695             else if(oligo[i] == 'G'){   reverse += 'C'; }
2696             else if(oligo[i] == 'C'){   reverse += 'G'; }
2697             
2698             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2699             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2700             
2701             else if(oligo[i] == 'M'){   reverse += 'K'; }
2702             else if(oligo[i] == 'K'){   reverse += 'M'; }
2703             
2704             else if(oligo[i] == 'W'){   reverse += 'W'; }
2705             else if(oligo[i] == 'S'){   reverse += 'S'; }
2706             
2707             else if(oligo[i] == 'B'){   reverse += 'V'; }
2708             else if(oligo[i] == 'V'){   reverse += 'B'; }
2709             
2710             else if(oligo[i] == 'D'){   reverse += 'H'; }
2711             else if(oligo[i] == 'H'){   reverse += 'D'; }
2712             
2713             else if(oligo[i] == '-'){   reverse += '-'; } 
2714             else                                                {       reverse += 'N'; }
2715         }
2716         
2717         
2718         return reverse;
2719     }
2720         catch(exception& e) {
2721                 m->errorOut(e, "TrimOligos", "reverseOligo");
2722                 exit(1);
2723         }
2724 }
2725
2726 /********************************************************************/
2727
2728
2729