]> git.donarmstrong.com Git - mothur.git/blob - trimoligos.cpp
Revert to previous commit
[mothur.git] / trimoligos.cpp
1 /*
2  *  trimoligos.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/1/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimoligos.h"
11 #include "alignment.hpp"
12 #include "needlemanoverlap.hpp"
13
14
15 /********************************************************************/
16 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
17 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
18         try {
19                 m = MothurOut::getInstance();
20                 
21                 pdiffs = p;
22                 bdiffs = b;
23         ldiffs = l;
24         sdiffs = s;
25                 
26                 barcodes = br;
27                 primers = pr;
28                 revPrimer = r;
29         linker = lk;
30         spacer = sp;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "TrimOligos", "TrimOligos");
34                 exit(1);
35         }
36 }
37 /********************************************************************/
38 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
39 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
40         try {
41                 m = MothurOut::getInstance();
42                 
43                 pdiffs = p;
44                 bdiffs = b;
45                 
46                 barcodes = br;
47                 primers = pr;
48                 revPrimer = r;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimOligos", "TrimOligos");
52                 exit(1);
53         }
54 }
55 /********************************************************************/
56 TrimOligos::~TrimOligos() {}
57 //*******************************************************************/
58 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
59         try {
60                 
61                 string rawSequence = seq.getUnaligned();
62                 int success = bdiffs + 1;       //guilty until proven innocent
63                 
64                 //can you find the barcode
65                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
66                         string oligo = it->first;
67                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
68                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
69                                 break;  
70                         }
71                         
72                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
73                                 group = it->second;
74                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
75                                 
76                                 if(qual.getName() != ""){
77                                         qual.trimQScores(oligo.length(), -1);
78                                 }
79                                 
80                                 success = 0;
81                                 break;
82                         }
83                 }
84                 
85                 //if you found the barcode or if you don't want to allow for diffs
86                 if ((bdiffs == 0) || (success == 0)) { return success;  }
87                 
88                 else { //try aligning and see if you can find it
89                         
90                         int maxLength = 0;
91                         
92                         Alignment* alignment;
93                         if (barcodes.size() > 0) {
94                                 map<string,int>::iterator it; 
95                                 
96                                 for(it=barcodes.begin();it!=barcodes.end();it++){
97                                         if(it->first.length() > maxLength){
98                                                 maxLength = it->first.length();
99                                         }
100                                 }
101                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
102                                 
103                         }else{ alignment = NULL; } 
104                         
105                         //can you find the barcode
106                         int minDiff = 1e6;
107                         int minCount = 1;
108                         int minGroup = -1;
109                         int minPos = 0;
110                         
111                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
112                                 string oligo = it->first;
113                                 //                              int length = oligo.length();
114                                 
115                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
116                                         success = bdiffs + 10;
117                                         break;
118                                 }
119                                 
120                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
121                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
122                                 oligo = alignment->getSeqAAln();
123                                 string temp = alignment->getSeqBAln();
124                                 
125                                 int alnLength = oligo.length();
126                                 
127                                 for(int i=oligo.length()-1;i>=0;i--){
128                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
129                                 }
130                                 oligo = oligo.substr(0,alnLength);
131                                 temp = temp.substr(0,alnLength);
132                                 
133                                 int numDiff = countDiffs(oligo, temp);
134                                 
135                                 if(numDiff < minDiff){
136                                         minDiff = numDiff;
137                                         minCount = 1;
138                                         minGroup = it->second;
139                                         minPos = 0;
140                                         for(int i=0;i<alnLength;i++){
141                                                 if(temp[i] != '-'){
142                                                         minPos++;
143                                                 }
144                                         }
145                                 }
146                                 else if(numDiff == minDiff){
147                                         minCount++;
148                                 }
149                                 
150                         }
151                         
152                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
153                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
154                         else{                                                                                                   //use the best match
155                                 group = minGroup;
156                                 seq.setUnaligned(rawSequence.substr(minPos));
157     
158                                 if(qual.getName() != ""){
159                                         qual.trimQScores(minPos, -1);
160                                 }
161                                 success = minDiff;
162                         }
163                         
164                         if (alignment != NULL) {  delete alignment;  }
165                         
166                 }
167                 
168                 return success;
169                 
170         }
171         catch(exception& e) {
172                 m->errorOut(e, "TrimOligos", "stripBarcode");
173                 exit(1);
174         }
175         
176 }
177 //*******************************************************************/
178 int TrimOligos::stripBarcode(Sequence& seq, int& group){
179         try {
180                 
181                 string rawSequence = seq.getUnaligned();
182                 int success = bdiffs + 1;       //guilty until proven innocent
183                 
184                 //can you find the barcode
185                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
186                         string oligo = it->first;
187                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
188                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
189                                 break;  
190                         }
191                         
192                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
193                                 group = it->second;
194                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
195                                 
196                                 success = 0;
197                                 break;
198                         }
199                 }
200                 
201                 //if you found the barcode or if you don't want to allow for diffs
202                 if ((bdiffs == 0) || (success == 0)) { return success;  }
203                 
204                 else { //try aligning and see if you can find it
205                         
206                         int maxLength = 0;
207                         
208                         Alignment* alignment;
209                         if (barcodes.size() > 0) {
210                                 map<string,int>::iterator it=barcodes.begin();
211                                 
212                                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
213                                         if(it->first.length() > maxLength){
214                                                 maxLength = it->first.length();
215                                         }
216                                 }
217                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
218                                 
219                         }else{ alignment = NULL; } 
220                         
221                         //can you find the barcode
222                         int minDiff = 1e6;
223                         int minCount = 1;
224                         int minGroup = -1;
225                         int minPos = 0;
226                         
227                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
228                                 string oligo = it->first;
229                                 //                              int length = oligo.length();
230                                 
231                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
232                                         success = bdiffs + 10;
233                                         break;
234                                 }
235                                 
236                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
237                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
238                                 oligo = alignment->getSeqAAln();
239                                 string temp = alignment->getSeqBAln();
240                                 
241                                 int alnLength = oligo.length();
242                                 
243                                 for(int i=oligo.length()-1;i>=0;i--){
244                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
245                                 }
246                                 oligo = oligo.substr(0,alnLength);
247                                 temp = temp.substr(0,alnLength);
248                                 
249                                 int numDiff = countDiffs(oligo, temp);
250                                 
251                                 if(numDiff < minDiff){
252                                         minDiff = numDiff;
253                                         minCount = 1;
254                                         minGroup = it->second;
255                                         minPos = 0;
256                                         for(int i=0;i<alnLength;i++){
257                                                 if(temp[i] != '-'){
258                                                         minPos++;
259                                                 }
260                                         }
261                                 }
262                                 else if(numDiff == minDiff){
263                                         minCount++;
264                                 }
265                                 
266                         }
267                         
268                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
269                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
270                         else{                                                                                                   //use the best match
271                                 group = minGroup;
272                                 seq.setUnaligned(rawSequence.substr(minPos));
273                                 success = minDiff;
274                         }
275                         
276                         if (alignment != NULL) {  delete alignment;  }
277                         
278                 }
279                 
280                 return success;
281                 
282         }
283         catch(exception& e) {
284                 m->errorOut(e, "TrimOligos", "stripBarcode");
285                 exit(1);
286         }
287         
288 }
289 //********************************************************************/
290 int TrimOligos::stripForward(Sequence& seq, int& group){
291         try {
292                 
293                 string rawSequence = seq.getUnaligned();
294                 int success = pdiffs + 1;       //guilty until proven innocent
295                 
296                 //can you find the primer
297                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
298                         string oligo = it->first;
299                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
300                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
301                                 break;  
302                         }
303                         
304                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
305                                 group = it->second;
306                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
307                                 success = 0;
308                                 break;
309                         }
310                 }
311                 
312                 //if you found the barcode or if you don't want to allow for diffs
313                 if ((pdiffs == 0) || (success == 0)) {  return success;  }
314                 
315                 else { //try aligning and see if you can find it
316                         
317                         int maxLength = 0;
318                         
319                         Alignment* alignment;
320                         if (primers.size() > 0) {
321                                 map<string,int>::iterator it; 
322                                 
323                                 for(it=primers.begin();it!=primers.end();it++){
324                                         if(it->first.length() > maxLength){
325                                                 maxLength = it->first.length();
326                                         }
327                                 }
328                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
329                                 
330                         }else{ alignment = NULL; } 
331                         
332                         //can you find the barcode
333                         int minDiff = 1e6;
334                         int minCount = 1;
335                         int minGroup = -1;
336                         int minPos = 0;
337                         
338                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
339                                 string oligo = it->first;
340                                 //                              int length = oligo.length();
341                                 
342                                 if(rawSequence.length() < maxLength){   
343                                         success = pdiffs + 100;
344                                         break;
345                                 }
346                                 
347                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
348                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
349                                 oligo = alignment->getSeqAAln();
350                                 string temp = alignment->getSeqBAln();
351                                 
352                                 int alnLength = oligo.length();
353                                 
354                                 for(int i=oligo.length()-1;i>=0;i--){
355                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
356                                 }
357                                 oligo = oligo.substr(0,alnLength);
358                                 temp = temp.substr(0,alnLength);
359                                 
360                                 int numDiff = countDiffs(oligo, temp);
361                                 
362                                 if(numDiff < minDiff){
363                                         minDiff = numDiff;
364                                         minCount = 1;
365                                         minGroup = it->second;
366                                         minPos = 0;
367                                         for(int i=0;i<alnLength;i++){
368                                                 if(temp[i] != '-'){
369                                                         minPos++;
370                                                 }
371                                         }
372                                 }
373                                 else if(numDiff == minDiff){
374                                         minCount++;
375                                 }
376                                 
377                         }
378                         
379                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
380                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
381                         else{                                                                                                   //use the best match
382                                 group = minGroup;
383                                 seq.setUnaligned(rawSequence.substr(minPos));
384                                 success = minDiff;
385                         }
386                         
387                         if (alignment != NULL) {  delete alignment;  }
388                         
389                 }
390                 
391                 return success;
392                 
393         }
394         catch(exception& e) {
395                 m->errorOut(e, "TrimOligos", "stripForward");
396                 exit(1);
397         }
398 }
399 //*******************************************************************/
400 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
401         try {
402                 string rawSequence = seq.getUnaligned();
403                 int success = pdiffs + 1;       //guilty until proven innocent
404                 
405                 //can you find the primer
406                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
407                         string oligo = it->first;
408                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
409                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
410                                 break;  
411                         }
412                         
413                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
414                                 group = it->second;
415                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
416                                 if(qual.getName() != ""){
417                                         if (!keepForward) {  qual.trimQScores(oligo.length(), -1); }
418                                 }
419                                 success = 0;
420                                 break;
421                         }
422                 }
423                 
424                 //if you found the barcode or if you don't want to allow for diffs
425                 if ((pdiffs == 0) || (success == 0)) { return success;  }
426                 
427                 else { //try aligning and see if you can find it
428                         
429                         int maxLength = 0;
430                         
431                         Alignment* alignment;
432                         if (primers.size() > 0) {
433                                 map<string,int>::iterator it; 
434                                 
435                                 for(it=primers.begin();it!=primers.end();it++){
436                                         if(it->first.length() > maxLength){
437                                                 maxLength = it->first.length();
438                                         }
439                                 }
440                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
441                                 
442                         }else{ alignment = NULL; } 
443                         
444                         //can you find the barcode
445                         int minDiff = 1e6;
446                         int minCount = 1;
447                         int minGroup = -1;
448                         int minPos = 0;
449                         
450                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
451                                 string oligo = it->first;
452                                 //                              int length = oligo.length();
453                                 
454                                 if(rawSequence.length() < maxLength){   
455                                         success = pdiffs + 100;
456                                         break;
457                                 }
458                                 
459                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
460                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
461                                 oligo = alignment->getSeqAAln();
462                                 string temp = alignment->getSeqBAln();
463                                 
464                                 int alnLength = oligo.length();
465                                 
466                                 for(int i=oligo.length()-1;i>=0;i--){
467                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
468                                 }
469                                 oligo = oligo.substr(0,alnLength);
470                                 temp = temp.substr(0,alnLength);
471                                 
472                                 int numDiff = countDiffs(oligo, temp);
473                                 
474                                 if(numDiff < minDiff){
475                                         minDiff = numDiff;
476                                         minCount = 1;
477                                         minGroup = it->second;
478                                         minPos = 0;
479                                         for(int i=0;i<alnLength;i++){
480                                                 if(temp[i] != '-'){
481                                                         minPos++;
482                                                 }
483                                         }
484                                 }
485                                 else if(numDiff == minDiff){
486                                         minCount++;
487                                 }
488                                 
489                         }
490                         
491                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
492                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
493                         else{                                                                                                   //use the best match
494                                 group = minGroup;
495                                 if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
496                                 if(qual.getName() != ""){
497                                         if (!keepForward) { qual.trimQScores(minPos, -1); }
498                                 }
499                                 success = minDiff;
500                         }
501                         
502                         if (alignment != NULL) {  delete alignment;  }
503                         
504                 }
505                 
506                 return success;
507                 
508         }
509         catch(exception& e) {
510                 m->errorOut(e, "TrimOligos", "stripForward");
511                 exit(1);
512         }
513 }
514 //******************************************************************/
515 bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){
516         try {
517                 string rawSequence = seq.getUnaligned();
518                 bool success = 0;       //guilty until proven innocent
519                 
520                 for(int i=0;i<revPrimer.size();i++){
521                         string oligo = revPrimer[i];
522                         
523                         if(rawSequence.length() < oligo.length()){
524                                 success = 0;
525                                 break;
526                         }
527                         
528                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
529                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
530                                 if(qual.getName() != ""){
531                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
532                                 }
533                                 success = 1;
534                                 break;
535                         }
536                 }       
537                 return success;
538                 
539         }
540         catch(exception& e) {
541                 m->errorOut(e, "TrimOligos", "stripReverse");
542                 exit(1);
543         }
544 }
545 //******************************************************************/
546 bool TrimOligos::stripReverse(Sequence& seq){
547         try {
548                 
549                 string rawSequence = seq.getUnaligned();
550                 bool success = 0;       //guilty until proven innocent
551                 
552                 for(int i=0;i<revPrimer.size();i++){
553                         string oligo = revPrimer[i];
554                         
555                         if(rawSequence.length() < oligo.length()){
556                                 success = 0;
557                                 break;
558                         }
559                         
560                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
561                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
562                                 success = 1;
563                                 break;
564                         }
565                 }       
566                 
567                 return success;
568                 
569         }
570         catch(exception& e) {
571                 m->errorOut(e, "TrimOligos", "stripReverse");
572                 exit(1);
573         }
574 }
575 //******************************************************************/
576 bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
577         try {
578                 string rawSequence = seq.getUnaligned();
579                 bool success = ldiffs + 1;      //guilty until proven innocent
580                 
581                 for(int i=0;i<linker.size();i++){
582                         string oligo = linker[i];
583                         
584                         if(rawSequence.length() < oligo.length()){
585                                 success = ldiffs + 10;
586                                 break;
587                         }
588                         
589                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
590                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
591                                 if(qual.getName() != ""){
592                                         qual.trimQScores(oligo.length(), -1);
593                                 }
594                                 success = 0;
595                                 break;
596                         }
597                 }
598         
599         //if you found the linker or if you don't want to allow for diffs
600                 if ((ldiffs == 0) || (success == 0)) { return success;  }
601                 
602                 else { //try aligning and see if you can find it
603                         
604                         int maxLength = 0;
605                         
606                         Alignment* alignment;
607                         if (linker.size() > 0) {
608                                 for(int i = 0; i < linker.size(); i++){
609                                         if(linker[i].length() > maxLength){
610                                                 maxLength = linker[i].length();
611                                         }
612                                 }
613                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));  
614                                 
615                         }else{ alignment = NULL; } 
616                         
617                         //can you find the barcode
618                         int minDiff = 1e6;
619                         int minCount = 1;
620                         int minPos = 0;
621                         
622                         for(int i = 0; i < linker.size(); i++){
623                                 string oligo = linker[i];
624                                 //                              int length = oligo.length();
625                                 
626                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
627                                         success = ldiffs + 10;
628                                         break;
629                                 }
630                                 
631                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
632                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
633                                 oligo = alignment->getSeqAAln();
634                                 string temp = alignment->getSeqBAln();
635                                 
636                                 int alnLength = oligo.length();
637                                 
638                                 for(int i=oligo.length()-1;i>=0;i--){
639                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
640                                 }
641                                 oligo = oligo.substr(0,alnLength);
642                                 temp = temp.substr(0,alnLength);
643                                 
644                                 int numDiff = countDiffs(oligo, temp);
645                                 
646                                 if(numDiff < minDiff){
647                                         minDiff = numDiff;
648                                         minCount = 1;
649                                         minPos = 0;
650                                         for(int i=0;i<alnLength;i++){
651                                                 if(temp[i] != '-'){
652                                                         minPos++;
653                                                 }
654                                         }
655                                 }
656                                 else if(numDiff == minDiff){
657                                         minCount++;
658                                 }
659                                 
660                         }
661                         
662                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
663                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
664                         else{                                                                                                   //use the best match
665                                 seq.setUnaligned(rawSequence.substr(minPos));
666                                 
667                                 if(qual.getName() != ""){
668                                         qual.trimQScores(minPos, -1);
669                                 }
670                                 success = minDiff;
671                         }
672                         
673                         if (alignment != NULL) {  delete alignment;  }
674                         
675                 }
676
677        
678                 return success;
679                 
680         }
681         catch(exception& e) {
682                 m->errorOut(e, "TrimOligos", "stripLinker");
683                 exit(1);
684         }
685 }
686 //******************************************************************/
687 bool TrimOligos::stripLinker(Sequence& seq){
688         try {
689                 
690                 string rawSequence = seq.getUnaligned();
691                 bool success = ldiffs +1;       //guilty until proven innocent
692                 
693                 for(int i=0;i<linker.size();i++){
694                         string oligo = linker[i];
695                         
696                         if(rawSequence.length() < oligo.length()){
697                                 success = ldiffs +10;
698                                 break;
699                         }
700                         
701                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
702                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
703                                 success = 0;
704                                 break;
705                         }
706                 }       
707                 
708         //if you found the linker or if you don't want to allow for diffs
709                 if ((ldiffs == 0) || (success == 0)) { return success;  }
710                 
711                 else { //try aligning and see if you can find it
712                         
713                         int maxLength = 0;
714                         
715                         Alignment* alignment;
716                         if (linker.size() > 0) {
717                                 for(int i = 0; i < linker.size(); i++){
718                                         if(linker[i].length() > maxLength){
719                                                 maxLength = linker[i].length();
720                                         }
721                                 }
722                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));  
723                                 
724                         }else{ alignment = NULL; } 
725                         
726                         //can you find the barcode
727                         int minDiff = 1e6;
728                         int minCount = 1;
729                         int minPos = 0;
730                         
731                         for(int i = 0; i < linker.size(); i++){
732                                 string oligo = linker[i];
733                                 //                              int length = oligo.length();
734                                 
735                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
736                                         success = ldiffs + 10;
737                                         break;
738                                 }
739                                 
740                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
741                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
742                                 oligo = alignment->getSeqAAln();
743                                 string temp = alignment->getSeqBAln();
744                                 
745                                 int alnLength = oligo.length();
746                                 
747                                 for(int i=oligo.length()-1;i>=0;i--){
748                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
749                                 }
750                                 oligo = oligo.substr(0,alnLength);
751                                 temp = temp.substr(0,alnLength);
752                                 
753                                 int numDiff = countDiffs(oligo, temp);
754                                 
755                                 if(numDiff < minDiff){
756                                         minDiff = numDiff;
757                                         minCount = 1;
758                                         minPos = 0;
759                                         for(int i=0;i<alnLength;i++){
760                                                 if(temp[i] != '-'){
761                                                         minPos++;
762                                                 }
763                                         }
764                                 }
765                                 else if(numDiff == minDiff){
766                                         minCount++;
767                                 }
768                                 
769                         }
770                         
771                         if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
772                         else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
773                         else{                                                                                                   //use the best match
774                                 seq.setUnaligned(rawSequence.substr(minPos));
775                                 success = minDiff;
776                         }
777                         
778                         if (alignment != NULL) {  delete alignment;  }
779                         
780                 }
781
782                 return success;
783                 
784         }
785         catch(exception& e) {
786                 m->errorOut(e, "TrimOligos", "stripLinker");
787                 exit(1);
788         }
789 }
790
791 //******************************************************************/
792 bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
793         try {
794                 string rawSequence = seq.getUnaligned();
795                 bool success = sdiffs+1;        //guilty until proven innocent
796                 
797                 for(int i=0;i<spacer.size();i++){
798                         string oligo = spacer[i];
799                         
800                         if(rawSequence.length() < oligo.length()){
801                                 success = sdiffs+10;
802                                 break;
803                         }
804                         
805                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
806                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
807                                 if(qual.getName() != ""){
808                                         qual.trimQScores(oligo.length(), -1);
809                                 }
810                                 success = 0;
811                                 break;
812                         }
813                 }
814         
815         //if you found the spacer or if you don't want to allow for diffs
816                 if ((sdiffs == 0) || (success == 0)) { return success;  }
817                 
818                 else { //try aligning and see if you can find it
819                         
820                         int maxLength = 0;
821                         
822                         Alignment* alignment;
823                         if (spacer.size() > 0) {
824                                 for(int i = 0; i < spacer.size(); i++){
825                                         if(spacer[i].length() > maxLength){
826                                                 maxLength = spacer[i].length();
827                                         }
828                                 }
829                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));  
830                                 
831                         }else{ alignment = NULL; } 
832                         
833                         //can you find the barcode
834                         int minDiff = 1e6;
835                         int minCount = 1;
836                         int minPos = 0;
837                         
838                         for(int i = 0; i < spacer.size(); i++){
839                                 string oligo = spacer[i];
840                                 //                              int length = oligo.length();
841                                 
842                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
843                                         success = sdiffs + 10;
844                                         break;
845                                 }
846                                 
847                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
848                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
849                                 oligo = alignment->getSeqAAln();
850                                 string temp = alignment->getSeqBAln();
851                                 
852                                 int alnLength = oligo.length();
853                                 
854                                 for(int i=oligo.length()-1;i>=0;i--){
855                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
856                                 }
857                                 oligo = oligo.substr(0,alnLength);
858                                 temp = temp.substr(0,alnLength);
859                                 
860                                 int numDiff = countDiffs(oligo, temp);
861                                 
862                                 if(numDiff < minDiff){
863                                         minDiff = numDiff;
864                                         minCount = 1;
865                                         minPos = 0;
866                                         for(int i=0;i<alnLength;i++){
867                                                 if(temp[i] != '-'){
868                                                         minPos++;
869                                                 }
870                                         }
871                                 }
872                                 else if(numDiff == minDiff){
873                                         minCount++;
874                                 }
875                                 
876                         }
877                         
878                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
879                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
880                         else{                                                                                                   //use the best match
881                                 seq.setUnaligned(rawSequence.substr(minPos));
882                                 
883                                 if(qual.getName() != ""){
884                                         qual.trimQScores(minPos, -1);
885                                 }
886                                 success = minDiff;
887                         }
888                         
889                         if (alignment != NULL) {  delete alignment;  }
890                         
891                 }
892         
893
894                 return success;
895                 
896         }
897         catch(exception& e) {
898                 m->errorOut(e, "TrimOligos", "stripSpacer");
899                 exit(1);
900         }
901 }
902 //******************************************************************/
903 bool TrimOligos::stripSpacer(Sequence& seq){
904         try {
905                 
906                 string rawSequence = seq.getUnaligned();
907                 bool success = sdiffs+1;        //guilty until proven innocent
908                 
909                 for(int i=0;i<spacer.size();i++){
910                         string oligo = spacer[i];
911                         
912                         if(rawSequence.length() < oligo.length()){
913                                 success = sdiffs+10;
914                                 break;
915                         }
916                         
917                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
918                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
919                                 success = 0;
920                                 break;
921                         }
922                 }       
923                 
924         //if you found the spacer or if you don't want to allow for diffs
925                 if ((sdiffs == 0) || (success == 0)) { return success;  }
926                 
927                 else { //try aligning and see if you can find it
928                         
929                         int maxLength = 0;
930                         
931                         Alignment* alignment;
932                         if (spacer.size() > 0) {
933                                 for(int i = 0; i < spacer.size(); i++){
934                                         if(spacer[i].length() > maxLength){
935                                                 maxLength = spacer[i].length();
936                                         }
937                                 }
938                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));  
939                                 
940                         }else{ alignment = NULL; } 
941                         
942                         //can you find the barcode
943                         int minDiff = 1e6;
944                         int minCount = 1;
945                         int minPos = 0;
946                         
947                         for(int i = 0; i < spacer.size(); i++){
948                                 string oligo = spacer[i];
949                                 //                              int length = oligo.length();
950                                 
951                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
952                                         success = sdiffs + 10;
953                                         break;
954                                 }
955                                 
956                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
957                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
958                                 oligo = alignment->getSeqAAln();
959                                 string temp = alignment->getSeqBAln();
960                                 
961                                 int alnLength = oligo.length();
962                                 
963                                 for(int i=oligo.length()-1;i>=0;i--){
964                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
965                                 }
966                                 oligo = oligo.substr(0,alnLength);
967                                 temp = temp.substr(0,alnLength);
968                                 
969                                 int numDiff = countDiffs(oligo, temp);
970                                 
971                                 if(numDiff < minDiff){
972                                         minDiff = numDiff;
973                                         minCount = 1;
974                                         minPos = 0;
975                                         for(int i=0;i<alnLength;i++){
976                                                 if(temp[i] != '-'){
977                                                         minPos++;
978                                                 }
979                                         }
980                                 }
981                                 else if(numDiff == minDiff){
982                                         minCount++;
983                                 }
984                                 
985                         }
986                         
987                         if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
988                         else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
989                         else{                                                                                                   //use the best match
990                                 seq.setUnaligned(rawSequence.substr(minPos));
991                                 success = minDiff;
992                         }
993                         
994                         if (alignment != NULL) {  delete alignment;  }
995                         
996                 }
997
998                 return success;
999                 
1000         }
1001         catch(exception& e) {
1002                 m->errorOut(e, "TrimOligos", "stripSpacer");
1003                 exit(1);
1004         }
1005 }
1006
1007 //******************************************************************/
1008 bool TrimOligos::compareDNASeq(string oligo, string seq){
1009         try {
1010                 bool success = 1;
1011                 int length = oligo.length();
1012                 
1013                 for(int i=0;i<length;i++){
1014                         
1015                         if(oligo[i] != seq[i]){
1016                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1017                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1018                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1019                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1020                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1021                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1022                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1023                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1024                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1025                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1026                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1027                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1028                                 
1029                                 if(success == 0)        {       break;   }
1030                         }
1031                         else{
1032                                 success = 1;
1033                         }
1034                 }
1035                 
1036                 return success;
1037         }
1038         catch(exception& e) {
1039                 m->errorOut(e, "TrimOligos", "compareDNASeq");
1040                 exit(1);
1041         }
1042         
1043 }
1044 //********************************************************************/
1045 int TrimOligos::countDiffs(string oligo, string seq){
1046         try {
1047                 
1048                 int length = oligo.length();
1049                 int countDiffs = 0;
1050                 
1051                 for(int i=0;i<length;i++){
1052                         
1053                         if(oligo[i] != seq[i]){
1054                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1055                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1056                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1057                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1058                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1059                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1060                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1061                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1062                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1063                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1064                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1065                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1066                         }
1067                         
1068                 }
1069                 
1070                 return countDiffs;
1071         }
1072         catch(exception& e) {
1073                 m->errorOut(e, "TrimOligos", "countDiffs");
1074                 exit(1);
1075         }
1076 }
1077 /********************************************************************/
1078
1079
1080