]> git.donarmstrong.com Git - mothur.git/blobdiff - trimoligos.cpp
fixes while testing 1.33.0
[mothur.git] / trimoligos.cpp
index 5d375a75150c1cc86c5b6c39e3ac21cdeeb7f110..0f2a9cbb0e584ddfa393a53015b02b37ccfa3903 100644 (file)
@@ -368,6 +368,8 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                 temp = temp.substr(0,alnLength);
                 int numDiff = countDiffs(oligo, temp);
                 
+                if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
+                
                 if(numDiff < minDiff){
                     minDiff = numDiff;
                     minCount = 1;
@@ -431,10 +433,10 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                 break;
             }
             
-            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
                 group = it->first;
                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
-                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
                 success = 0;
                 break;
             }
@@ -487,6 +489,8 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                 temp = temp.substr(0,alnLength);
                 int numDiff = countDiffs(oligo, temp);
                 
+                if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
+                
                 if (alnLength == 0) { numDiff = bdiffs + 100; }
                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
                 
@@ -548,6 +552,9 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                     oligo = oligo.substr(0,alnLength);
                     temp = temp.substr(0,alnLength);
                     int numDiff = countDiffs(oligo, temp);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
+                    
                     if (alnLength == 0) { numDiff = bdiffs + 100; }
                     
                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
@@ -577,19 +584,6 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > bdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -654,12 +648,12 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                 break;
             }
             
-            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
                 group = it->first;
                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
-                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
                 forwardQual.trimQScores(foligo.length(), -1);
-                reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+                reverseQual.trimQScores(roligo.length(), -1);
                 success = 0;
                 break;
             }
@@ -713,7 +707,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                 int numDiff = countDiffs(oligo, temp);
                 
                 if (alnLength == 0) { numDiff = bdiffs + 100; }
-                //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+                if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
                 
                 if(numDiff < minDiff){
                     minDiff = numDiff;
@@ -775,6 +769,8 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                     int numDiff = countDiffs(oligo, temp);
                     if (alnLength == 0) { numDiff = bdiffs + 100; }
                     
+                    if (m->debug) { m->mothurOut("[DEBUG]: reverse " + reverseSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
+                    
                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
                     if(numDiff < minDiff){
                         minDiff = numDiff;
@@ -802,19 +798,6 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > bdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -881,6 +864,8 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
                 break;
             }
             
+            if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
+            
             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
                 group = it->first;
                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
@@ -1033,19 +1018,6 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > bdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -1065,6 +1037,7 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
                         group = matches[0];
                         fStart = minFPos[0];
                         rStart = rawSeq.length() - minRPos[0];
+                        if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
                     }
                     
                     //we have an acceptable match for the forward and reverse, but do they match?
@@ -1116,6 +1089,8 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                 break;
             }
             
+            if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
+            
             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
                 group = it->first;
                 if (!keepForward) {
@@ -1165,7 +1140,7 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                     success = pdiffs + 10;
                     break;
                 }
-                //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
+                //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
                 oligo = alignment->getSeqAAln();
@@ -1270,19 +1245,6 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > pdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -1302,6 +1264,7 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                         group = matches[0];
                         fStart = minFPos[0];
                         rStart = rawSeq.length() - minRPos[0];
+                        if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
                     }
                     
                     //we have an acceptable match for the forward and reverse, but do they match?
@@ -1355,12 +1318,12 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                 break;
             }
             
-            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
                 group = it->first;
                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
-                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
                 forwardQual.trimQScores(foligo.length(), -1);
-                reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+                reverseQual.trimQScores(roligo.length(), -1);
                 success = 0;
                 break;
             }
@@ -1416,6 +1379,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                 if (alnLength == 0) { numDiff = pdiffs + 100; }
                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
                 
+                if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
+                
                 if(numDiff < minDiff){
                     minDiff = numDiff;
                     minCount = 1;
@@ -1476,6 +1441,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                     int numDiff = countDiffs(oligo, temp);
                     if (alnLength == 0) { numDiff = pdiffs + 100; }
                     
+                    if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
+                    
                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
                     if(numDiff < minDiff){
                         minDiff = numDiff;
@@ -1503,19 +1470,6 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > pdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -1585,7 +1539,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
                 group = it->first;
                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
-                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
                 success = 0;
                 break;
             }
@@ -1638,6 +1592,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                 temp = temp.substr(0,alnLength);
                 int numDiff = countDiffs(oligo, temp);
                 
+                if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
+                
                 if (alnLength == 0) { numDiff = pdiffs + 100; }
                 //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
                 
@@ -1699,6 +1655,9 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                     oligo = oligo.substr(0,alnLength);
                     temp = temp.substr(0,alnLength);
                     int numDiff = countDiffs(oligo, temp);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n");  }
+                    
                     if (alnLength == 0) { numDiff = pdiffs + 100; }
                     
                     //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
@@ -1728,19 +1687,6 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > pdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -1845,6 +1791,8 @@ int TrimOligos::stripBarcode(Sequence& seq, int& group){
                 
                 int numDiff = countDiffs(oligo, temp);
                 
+                if (m->debug) { m->mothurOut("[DEBUG]: " + seq.getName() + " aligned fragment =" + temp + ", barcode =" + oligo + ", numDiffs = " + toString(numDiff) + "\n"); }
+                
                 if(numDiff < minDiff){
                     minDiff = numDiff;
                     minCount = 1;