]> git.donarmstrong.com Git - mothur.git/blobdiff - trimoligos.cpp
changed random forest output filename
[mothur.git] / trimoligos.cpp
index 5d375a75150c1cc86c5b6c39e3ac21cdeeb7f110..ddc1053f50d3eba28064907e4c6f0a60a958b3ee 100644 (file)
@@ -431,10 +431,10 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                 break;
             }
             
-            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
                 group = it->first;
                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
-                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
                 success = 0;
                 break;
             }
@@ -577,19 +577,6 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > bdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -654,12 +641,12 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                 break;
             }
             
-            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
                 group = it->first;
                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
-                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
                 forwardQual.trimQScores(foligo.length(), -1);
-                reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+                reverseQual.trimQScores(roligo.length(), -1);
                 success = 0;
                 break;
             }
@@ -802,19 +789,6 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > bdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -881,6 +855,8 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
                 break;
             }
             
+            if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
+            
             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
                 group = it->first;
                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
@@ -1033,19 +1009,6 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > bdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -1065,6 +1028,7 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
                         group = matches[0];
                         fStart = minFPos[0];
                         rStart = rawSeq.length() - minRPos[0];
+                        if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
                     }
                     
                     //we have an acceptable match for the forward and reverse, but do they match?
@@ -1116,6 +1080,8 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                 break;
             }
             
+            if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
+            
             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
                 group = it->first;
                 if (!keepForward) {
@@ -1165,7 +1131,7 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                     success = pdiffs + 10;
                     break;
                 }
-                //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
+                //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
                 alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
                 oligo = alignment->getSeqAAln();
@@ -1270,19 +1236,6 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > pdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -1302,6 +1255,7 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                         group = matches[0];
                         fStart = minFPos[0];
                         rStart = rawSeq.length() - minRPos[0];
+                        if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
                     }
                     
                     //we have an acceptable match for the forward and reverse, but do they match?
@@ -1355,12 +1309,12 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                 break;
             }
             
-            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+            if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
                 group = it->first;
                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
-                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
                 forwardQual.trimQScores(foligo.length(), -1);
-                reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+                reverseQual.trimQScores(roligo.length(), -1);
                 success = 0;
                 break;
             }
@@ -1503,19 +1457,6 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > pdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;
@@ -1585,7 +1526,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
             if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
                 group = it->first;
                 forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
-                reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+                reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
                 success = 0;
                 break;
             }
@@ -1728,19 +1669,6 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                     
                 }
                 
-                /*cout << minDiff << '\t' << minCount << '\t' << endl;
-                 for (int i = 0; i < minFGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;
-                 for (int i = 0; i < minRGroup.size(); i++) {
-                 cout << i << '\t';
-                 for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
-                 cout << endl;
-                 }
-                 cout << endl;*/
                 if(minDiff > pdiffs)   {       success = minDiff;      }       //no good matches
                 else {
                     bool foundMatch = false;