]> git.donarmstrong.com Git - mothur.git/blobdiff - seqerrorcommand.cpp
mod to seq.error to allow for degenerate bases in the ref sequences
[mothur.git] / seqerrorcommand.cpp
index 00f78626b6270455f5a8f35464923fe72e9ff527..af044fec3f1b60b07da406894ebc454d96df21bd 100644 (file)
@@ -343,7 +343,7 @@ int SeqErrorCommand::execute(){
                                int startBase = report.getQueryStart();
                                int endBase = report.getQueryEnd();
 
-                               quality = QualityScores(qualFile, origLength);
+                               quality = QualityScores(qualFile);
 
                                if(!ignoreSeq){
                                        quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight);
@@ -417,20 +417,22 @@ void SeqErrorCommand::getReferences(){
                ifstream referenceFile;
                m->openInputFile(referenceFileName, referenceFile);
                
+               int numAmbigSeqs = 0;
+               
                while(referenceFile){
                        Sequence currentSeq(referenceFile);
                        int numAmbigs = currentSeq.getAmbigBases();
-                       
-                       if(numAmbigs != 0){
-                               m->mothurOut("Warning: " + toString(currentSeq.getName()) + " has " + toString(numAmbigs) + " ambiguous bases, these bases will be removed\n");
-                               currentSeq.removeAmbigBases();
-                       }
+                       if(numAmbigs > 0){      numAmbigSeqs++; }
                        referenceSeqs.push_back(currentSeq);
                        m->gobble(referenceFile);
                }
-               numRefs = referenceSeqs.size();
-               
                referenceFile.close();
+               
+               if(numAmbigSeqs != 0){
+                       m->mothurOut("Warning: " + toString(numAmbigSeqs) + " reference sequences have ambiguous bases, these bases will be ignored\n");
+               }
+               
+               numRefs = referenceSeqs.size();
        }
        catch(exception& e) {
                m->errorOut(e, "SeqErrorCommand", "getReferences");
@@ -454,7 +456,7 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
                Compare errors;
 
                for(int i=0;i<alignLength;i++){
-                       if(q[i] != '.' && r[i] != '.' && (q[i] != '-' || r[i] != '-')){                 //      no missing data and no double gaps
+                       if(q[i] != 'N' && q[i] != '.' && r[i] != '.' && (q[i] != '-' || r[i] != '-')){                  //      no missing data and no double gaps
                                started = 1;
                                
                                if(q[i] == 'A'){
@@ -505,7 +507,7 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
                                if(started == 1){       break;  }
                        }
                        else if(q[i] != '.' && r[i] == '.'){            //      query extends beyond reference
-                               m->mothurOut("Warning: " + toString(query.getName()) + " extend beyond " + toString(reference.getName()) + ".  Ignoring the extra bases in the query\n");
+//                             m->mothurOut("Warning: " + toString(query.getName()) + " extend beyond " + toString(reference.getName()) + ".  Ignoring the extra bases in the query\n");
                                if(started == 1){       break;  }
                        }
                        else if(q[i] == '.' && r[i] == '.'){            //      both are missing data