]> git.donarmstrong.com Git - mothur.git/commitdiff
added ':' name check to seq. error and fastq.info. sffinfo now ignores clipQualRight...
authorSarah Westcott <mothur.westcott@gmail.com>
Mon, 11 Feb 2013 12:14:38 +0000 (07:14 -0500)
committerSarah Westcott <mothur.westcott@gmail.com>
Mon, 11 Feb 2013 12:14:38 +0000 (07:14 -0500)
12 files changed:
collectcommand.cpp
mothur.h
mothurout.cpp
optionparser.cpp
parsefastaqcommand.cpp
pcrseqscommand.h
prcseqscommand.cpp
qualityscores.cpp
seqerrorcommand.cpp
sffinfocommand.cpp
trimoligos.cpp
trimoligos.h

index de92e494ac7565953716e1ab413ffc13ee829913..b892a91bee89e9b6d01ac7bda8e40833fd4db3b0 100644 (file)
@@ -67,8 +67,8 @@ string CollectCommand::getHelpString(){
                helpString += "The collect.single command parameters are list, sabund, rabund, shared, label, freq, calc and abund.  list, sabund, rabund or shared is required unless you have a valid current file. \n";
                helpString += "The collect.single command should be in the following format: \n";
                helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
-               helpString += "collect.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
-               helpString += "Example collect(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-chao-ace-jack).\n";
+               helpString += "collect.single(label=yourLabel, freq=yourFreq, calc=yourEstimators).\n";
+               helpString += "Example collect(label=unique-.01-.03, freq=10, calc=sobs-chao-ace-jack).\n";
                helpString += "The default values for freq is 100, and calc are sobs-chao-ace-jack-shannon-npshannon-simpson.\n";
                helpString += validCalculator.printCalc("single");
                helpString += "The label parameter is used to analyze specific labels in your input.\n";
index cd14056b21ba82f9d6c07e650fcf210616e7a882..98b34c31c58c88b614412be4e9ba6874ec1cbe36 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -170,7 +170,6 @@ inline bool compareIndexes(PDistCell left, PDistCell right){
        return (left.index > right.index);      
 }
 //********************************************************************************************************************
-//sorts highest to lowest
 inline bool compareSpearman(spearmanRank left, spearmanRank right){
        return (left.score < right.score);      
 } 
@@ -185,11 +184,7 @@ inline bool compareSeqPriorityNodes(seqPriorityNode left, seqPriorityNode right)
     }
     return false;      
 } 
-//********************************************************************************************************************
-//sorts lowest to highest
-inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){
-       return (left.score < right.score);      
-} 
 /************************************************************/
 //sorts lowest to highest
 inline bool compareDistLinePairs(distlinePair left, distlinePair right){
index dc77490e4b9bba1bc9bf6e4c16b0f402985c73d1..9f678fef98505cae0de916b1db66914d5f41074f 100644 (file)
@@ -453,7 +453,7 @@ void MothurOut::errorOut(exception& e, string object, string function) {
         if (object == "cluster"){
             mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt.  \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
         }else {
-            mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  This is most commonly caused by trying to process a dataset too large, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G.  If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue.  Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+            mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G.  If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue.  If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
         }
     }
 }
@@ -951,7 +951,7 @@ string MothurOut::getFullPathName(string fileName){
                                }
                        
                                for (int i = index; i >= 0; i--) {
-                                       newFileName = dirs[i] +  "\\\\" + newFileName;          
+                                       newFileName = dirs[i] +  "\\" + newFileName;            
                                }
                                
                                return newFileName;
@@ -2776,6 +2776,7 @@ void MothurOut::splitAtDash(string& estim, set<string>& container) {
                string individual = "";
                int estimLength = estim.size();
                bool prevEscape = false;
+        /*
                for(int i=0;i<estimLength;i++){
                        if(prevEscape){
                                individual += estim[i];
@@ -2796,7 +2797,25 @@ void MothurOut::splitAtDash(string& estim, set<string>& container) {
                                }
                        }
                }
-               container.insert(individual);
+               */
+        
+        for(int i=0;i<estimLength;i++){
+            if(estim[i] == '-'){
+                if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
+                else {
+                    container.insert(individual);
+                    individual = "";
+                }
+            }else if(estim[i] == '\\'){
+                if (i < estimLength-1) { 
+                    if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
+                    else { individual += estim[i]; prevEscape = false;  } //if no, add in
+                }else { individual += estim[i]; }
+            }else {
+                individual += estim[i];
+            }
+        }
+        container.insert(individual);
         
        }
        catch(exception& e) {
@@ -2812,6 +2831,7 @@ void MothurOut::splitAtDash(string& estim, set<int>& container) {
                int lineNum;
                int estimLength = estim.size();
                bool prevEscape = false;
+        /*
                for(int i=0;i<estimLength;i++){
                        if(prevEscape){
                                individual += estim[i];
@@ -2832,7 +2852,26 @@ void MothurOut::splitAtDash(string& estim, set<int>& container) {
                                        prevEscape = false;
                                }
                        }
-               }
+               }*/
+        
+        for(int i=0;i<estimLength;i++){
+            if(estim[i] == '-'){
+                if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
+                else {
+                    convert(individual, lineNum); //convert the string to int
+                    container.insert(lineNum);
+                    individual = "";
+                }
+            }else if(estim[i] == '\\'){
+                if (i < estimLength-1) { 
+                    if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
+                    else { individual += estim[i]; prevEscape = false;  } //if no, add in
+                }else { individual += estim[i]; }
+            }else {
+                individual += estim[i];
+            }
+        }
+        
                convert(individual, lineNum); //convert the string to int
                container.insert(lineNum);
        }
index a6d5c0a668be5ebdc259a3108d4ff11341f7cd00..d9d9145f1ffcf73caa4c502fb19acec23476929e 100644 (file)
@@ -116,6 +116,8 @@ map<string, string> OptionParser::getParameters() {
                         it->second = m->getDesignFile();
                     }else if (it->first == "sff") {
                         it->second = m->getSFFFile();
+                    }else if (it->first == "flow") {
+                            it->second = m->getFlowFile();
                     }else if (it->first == "oligos") {
                         it->second = m->getOligosFile();
                     }else if (it->first == "accnos") {
index 89f97acf13ad45650a1f8294a3e88895beaa347c..704e752341320dd75b9f213658d74530583ec9cb 100644 (file)
@@ -183,7 +183,10 @@ int ParseFastaQCommand::execute(){
                        string name = m->getline(in); m->gobble(in);
                        if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else { name = name.substr(1); }
+                       else { 
+                name = name.substr(1); 
+                for (int i = 0; i < name.length(); i++) { if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } }
+            }
                        
                        //read sequence
                        string sequence = m->getline(in); m->gobble(in);
@@ -193,7 +196,10 @@ int ParseFastaQCommand::execute(){
                        string name2 = m->getline(in); m->gobble(in);
                        if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else { name2 = name2.substr(1);  }
+                       else { 
+                name2 = name2.substr(1);  
+                for (int i = 0; i < name2.length(); i++) { if (name2[i] == ':') { name2[i] = '_'; m->changedSeqNames = true; } }
+            }
                        
                        //read quality scores
                        string quality = m->getline(in); m->gobble(in);
index c6f7ff9c7f6670073a7080496f80eac1cdfb2a72..5ac6437bb730cf03e6e69a8bf7d1562be4aad595 100644 (file)
@@ -48,10 +48,10 @@ private:
        bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
     bool abort, keepprimer, keepdots;
        string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
-       int start, end, processors, length;
+       int start, end, processors, length, pdiffs;
        
     vector<string> revPrimer, outputNames;
-       vector<string> primers;
+       map<string, int> primers;
     
     int writeAccnos(set<string>);
     int readName(set<string>&);
@@ -62,10 +62,7 @@ private:
     bool readEcoli();
        int driverPcr(string, string, string, set<string>&, linePair);  
        int createProcesses(string, string, string, set<string>&);
-    bool findForward(Sequence&, int&, int&);
-    bool findReverse(Sequence&, int&, int&);
     bool isAligned(string, map<int, int>&);
-    bool compareDNASeq(string, string);
     string reverseOligo(string);
 };
 
@@ -78,7 +75,7 @@ struct pcrData {
     string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
        unsigned long long fstart;
        unsigned long long fend;
-       int count, start, end, length;
+       int count, start, end, length, pdiffs;
        MothurOut* m;
        vector<string> primers;
     vector<string> revPrimer;
@@ -87,7 +84,7 @@ struct pcrData {
        
        
        pcrData(){}
-       pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) {
+       pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
                filename = f;
         goodFasta = gf;
         badFasta = bfn;
@@ -104,6 +101,7 @@ struct pcrData {
         length = l;
                fstart = fst;
         fend = fen;
+        pdiffs = pd;
                count = 0;
        }
 };
@@ -132,6 +130,9 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                }
         
         set<int> lengths;
+        //pdiffs, bdiffs, primers, barcodes, revPrimers
+        map<string, int> faked;
+        TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
                
                for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
             pDataArray->count++;
@@ -160,62 +161,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     //process primers
                     if (pDataArray->primers.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        //bool good = findForward(currSeq, primerStart, primerEnd);
-                        ///////////////////////////////////////////////////////////////
-                        bool good = false;
-                        string rawSequence = currSeq.getUnaligned();
-                        
-                        for(int j=0;j<pDataArray->primers.size();j++){
-                            string oligo = pDataArray->primers[j];
-                            
-                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                            
-                            if(rawSequence.length() < oligo.length()) {  break;  }
-                            
-                            //search for primer
-                            int olength = oligo.length();
-                            for (int l = 0; l < rawSequence.length()-olength; l++){
-                                if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                                string rawChunk = rawSequence.substr(l, olength);
-                                //compareDNASeq(oligo, rawChunk)
-                                ////////////////////////////////////////////////////////
-                                bool success = 1;
-                                for(int k=0;k<olength;k++){
-                                    
-                                    if(oligo[k] != rawChunk[k]){
-                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
-                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
-                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
-                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
-                                        
-                                        if(success == 0)       {       break;   }
-                                    }
-                                    else{
-                                        success = 1;
-                                    }
-                                }
-                                
-                                ////////////////////////////////////////////////////////////////////
-                                if(success) {
-                                    primerStart = j;
-                                    primerEnd = primerStart + olength;
-                                    good = true; break;
-                                }
-                            }
-                            if (good) { break; }
-                        }      
-                        
-                        if (!good) { primerStart = 0; primerEnd = 0; }
-                        ///////////////////////////////////////////////////////////////
-                        
+                        bool good = trim.findForward(currSeq, primerStart, primerEnd);
                         
                         if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f";     }
                         else{
@@ -229,6 +175,15 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
                                 }
+                                ///////////////////////////////////////////////////////////////
+                                mapAligned.clear();
+                                string seq = currSeq.getAligned(); 
+                                int countBases = 0;
+                                for (int k = 0; k < seq.length(); k++) {
+                                    if (!isalpha(seq[k])) { ; }
+                                    else { mapAligned[countBases] = k; countBases++; } 
+                                }                                                   
+                                ///////////////////////////////////////////////////////////////
                             }else { 
                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
@@ -239,60 +194,8 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     //process reverse primers
                     if (pDataArray->revPrimer.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        bool good = false;
-                        //findReverse(currSeq, primerStart, primerEnd);
-                         ///////////////////////////////////////////////////////////////
-                        string rawSequence = currSeq.getUnaligned();
-                        
-                        for(int j=0;j<pDataArray->revPrimer.size();j++){
-                            string oligo = pDataArray->revPrimer[j];
-                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                            if(rawSequence.length() < oligo.length()) {  break;  }
-                            
-                            //search for primer
-                            int olength = oligo.length();
-                            for (int l = rawSequence.length()-olength; l >= 0; l--){
-                                
-                                string rawChunk = rawSequence.substr(l, olength);
-                                //compareDNASeq(oligo, rawChunk)
-                                ////////////////////////////////////////////////////////
-                                bool success = 1;
-                                for(int k=0;k<olength;k++){
-                                    
-                                    if(oligo[k] != rawChunk[k]){
-                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
-                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
-                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
-                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
-                                        
-                                        if(success == 0)       {       break;   }
-                                    }
-                                    else{
-                                        success = 1;
-                                    }
-                                }
-                                
-                                ////////////////////////////////////////////////////////////////////
-                                if(success) {
-                                    primerStart = j;
-                                    primerEnd = primerStart + olength;
-                                    good = true; break;
-                                }
-                            }
-                            if (good) { break; }
-                        }      
-                        
-                        if (!good) { primerStart = 0; primerEnd = 0; }
-
-                         ///////////////////////////////////////////////////////////////
+                        bool good = trim.findReverse(currSeq, primerStart, primerEnd);
+                         
                         if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r";     }
                         else{ 
                             //are you aligned
index 4d5b6d963a026433c5851de6d444edc524cd6d1a..5fa71bef6b3474cc516491044c41729a318d449f 100644 (file)
@@ -21,6 +21,8 @@ vector<string> PcrSeqsCommand::setParameters(){
                CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart);
                CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
                CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
+        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
+
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
         CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
@@ -41,7 +43,7 @@ string PcrSeqsCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The pcr.seqs command reads a fasta file.\n";
-        helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
+        helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, processors, keepprimer and keepdots.\n";
                helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
         helpString += "The start parameter allows you to provide a starting position to trim to.\n";
         helpString += "The end parameter allows you to provide a ending position to trim from.\n";
@@ -49,6 +51,7 @@ string PcrSeqsCommand::getHelpString(){
         helpString += "The processors parameter allows you to use multiple processors.\n";
         helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
         helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
+        helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
                helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
                return helpString;
@@ -262,6 +265,9 @@ PcrSeqsCommand::PcrSeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors); 
+            
+            temp = validParameter.validFile(parameters, "pdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, pdiffs);
                        
             nomatch = validParameter.validFile(parameters, "nomatch", false);  if (nomatch == "not found") { nomatch = "reject"; }
                        
@@ -319,7 +325,7 @@ int PcrSeqsCommand::execute(){
                
                
         length = 0;
-               if(oligosfile != ""){    readOligos();     }  if (m->control_pressed) {  return 0; }
+               if(oligosfile != ""){    readOligos();     if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } }  if (m->control_pressed) {  return 0; }
         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
         
         vector<unsigned long long> positions; 
@@ -499,7 +505,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
             
                        // Allocate memory for thread data.
-                       pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
+                       pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
                        pDataArray.push_back(tempPcr);
                        
                        //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
@@ -561,6 +567,10 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                int count = 0;
         set<int> lengths;
         
+        //pdiffs, bdiffs, primers, barcodes, revPrimers
+        map<string, int> faked;
+        TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
+        
                while (!done) {
             
                        if (m->control_pressed) {  break; }
@@ -570,6 +580,8 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
             string trashCode = "";
                        if (currSeq.getName() != "") {
                 
+                if (m->debug) { m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); } 
+                
                 bool goodSeq = true;
                 if (oligosfile != "") {
                     map<int, int> mapAligned;
@@ -578,7 +590,7 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                     //process primers
                     if (primers.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        bool good = findForward(currSeq, primerStart, primerEnd);
+                        bool good = trim.findForward(currSeq, primerStart, primerEnd);
                         
                         if(!good){     if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
                         else{
@@ -590,8 +602,9 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                                 } 
                                 else                {  
                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
-                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                             }
                                 }
+                                isAligned(currSeq.getAligned(), mapAligned);
                             }else { 
                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
@@ -602,10 +615,10 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                     //process reverse primers
                     if (revPrimer.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        bool good = findReverse(currSeq, primerStart, primerEnd);
+                        bool good = trim.findReverse(currSeq, primerStart, primerEnd);
                         if(!good){     if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
                         else{ 
-                            //are you aligned
+                              //are you aligned
                             if (aligned) { 
                                 if (!keepprimer)    {  
                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
@@ -700,75 +713,9 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
        }
 }
 //********************************************************************/
-bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
-       try {
-               
-               string rawSequence = seq.getUnaligned();
-               
-               for(int j=0;j<primers.size();j++){
-                       string oligo = primers[j];
-                       
-                       if(rawSequence.length() < oligo.length()) {  break;  }
-                       
-                       //search for primer
-            int olength = oligo.length();
-            for (int j = 0; j < rawSequence.length()-olength; j++){
-                if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
-                string rawChunk = rawSequence.substr(j, olength);
-                if(compareDNASeq(oligo, rawChunk)) {
-                    primerStart = j;
-                    primerEnd = primerStart + olength;
-                    return true;
-                }
-                
-            }
-        }      
-               
-        primerStart = 0; primerEnd = 0;
-               return false;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimOligos", "stripForward");
-               exit(1);
-       }
-}
-//******************************************************************/
-bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
-       try {
-               
-               string rawSequence = seq.getUnaligned();
-               
-               for(int i=0;i<revPrimer.size();i++){
-                       string oligo = revPrimer[i];
-                       if(rawSequence.length() < oligo.length()) {  break;  }
-                       
-                       //search for primer
-            int olength = oligo.length();
-            for (int j = rawSequence.length()-olength; j >= 0; j--){
-                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
-                string rawChunk = rawSequence.substr(j, olength);
-            
-                if(compareDNASeq(oligo, rawChunk)) {
-                    primerStart = j;
-                    primerEnd = primerStart + olength;
-                    return true;
-                }
-                
-            }
-               }       
-               
-        primerStart = 0; primerEnd = 0;
-               return false;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PcrSeqsCommand", "findReverse");
-               exit(1);
-       }
-}
-//********************************************************************/
 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
        try {
+        aligned.clear();
         bool isAligned = false;
         
         int countBases = 0;
@@ -832,6 +779,7 @@ bool PcrSeqsCommand::readOligos(){
                m->openInputFile(oligosfile, inOligos);
                
                string type, oligo, group;
+        int primerCount = 0;
                
                while(!inOligos.eof()){
             
@@ -859,7 +807,7 @@ bool PcrSeqsCommand::readOligos(){
                         if (c == 10 || c == 13){       break;  } 
                         else if (c == 32 || c == 9){;} //space or tab
                                        } 
-                                       primers.push_back(oligo);
+                                       primers[oligo] = primerCount; primerCount++;
                 }else if(type == "REVERSE"){
                     string oligoRC = reverseOligo(oligo);
                     revPrimer.push_back(oligoRC);
@@ -935,43 +883,6 @@ int PcrSeqsCommand::writeAccnos(set<string> badNames){
                exit(1);
        }
     
-}
-//******************************************************************/
-bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
-       try {
-               bool success = 1;
-               int length = oligo.length();
-               
-               for(int i=0;i<length;i++){
-                       
-                       if(oligo[i] != seq[i]){
-                               if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
-                               else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
-                               else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
-                               else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
-                               else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
-                               else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
-                               
-                               if(success == 0)        {       break;   }
-                       }
-                       else{
-                               success = 1;
-                       }
-               }
-               
-               return success;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
-               exit(1);
-       }
-       
 }
 //***************************************************************************************************************
 int PcrSeqsCommand::readName(set<string>& names){
index 4998b3d1c4dabd18b97fb8f33e36860127e757e5..33ca1728052e4db59372fc2d8129e5bcbaf729c5 100644 (file)
@@ -34,13 +34,17 @@ QualityScores::QualityScores(ifstream& qFile){
                int score;
                seqName = getSequenceName(qFile);
                
+        if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n.");  }
+        
                if (!m->control_pressed) {
             string qScoreString = m->getline(qFile);
-            //cout << qScoreString << endl;
+            
+            if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n.");  }
+            
             while(qFile.peek() != '>' && qFile.peek() != EOF){
                 if (m->control_pressed) { break; }
                 string temp = m->getline(qFile);
-                //cout << temp << endl;
+                if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n.");  }
                 qScoreString +=  ' ' + temp;
             }
             //cout << "done reading " << endl; 
@@ -51,6 +55,8 @@ QualityScores::QualityScores(ifstream& qFile){
                 string temp;
                 qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
                 
+                if (m->debug) { m->mothurOut("[DEBUG]: score " + toString(qScores.size()) + " = '" + temp + "'\n.");  }
+                
                 //check temp to make sure its a number
                 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
                 convert(temp, score);
index 67e43aa0d4b7f6a6ff251bc45a8254ca49b8b162..0a6eae93df72e8556554bef32c5a52074a4d0a54 100644 (file)
@@ -1381,6 +1381,10 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam
                                        
                                        string sname = "";  nameStream >> sname;
                                        sname = sname.substr(1);
+                    
+                    for (int i = 0; i < sname.length(); i++) {
+                        if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
+                    }
                                        
                                        map<string, int>::iterator it = firstSeqNames.find(sname);
                                        
@@ -1441,6 +1445,10 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam
                     istringstream nameStream(input);
                     string sname = "";  nameStream >> sname;
                     
+                    for (int i = 0; i < sname.length(); i++) {
+                        if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
+                    }
+                    
                     map<string, int>::iterator it = firstSeqNamesReport.find(sname);
                 
                     if(it != firstSeqNamesReport.end()) { //this is the start of a new chunk
index a1b7066482b973bcbaeebefdc718c5c96d70aae9..0148ab19f407b91d293eebb547927d4c381fd6d0 100644 (file)
@@ -1039,7 +1039,11 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr
         
         if (trim) {
             if(header.clipQualRight < header.clipQualLeft){
-                seq = "NNNN";
+                if (header.clipQualRight == 0) { //don't trim right
+                    seq = seq.substr(header.clipQualLeft-1);
+                }else {
+                    seq = "NNNN";
+                }
             }
             else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
@@ -1260,7 +1264,11 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head
                
         if (trim) {
                        if(header.clipQualRight < header.clipQualLeft){
-                               seq = "NNNN";
+                               if (header.clipQualRight == 0) { //don't trim right
+                    seq = seq.substr(header.clipQualLeft-1);
+                }else {
+                    seq = "NNNN";
+                }
                        }
                        else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
                                seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
@@ -1296,8 +1304,13 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade
                
                if (trim) {
                        if(header.clipQualRight < header.clipQualLeft){
-                               out << ">" << header.name << " xy=" << header.xy << endl;
-                               out << "0\t0\t0\t0";
+                if (header.clipQualRight == 0) { //don't trim right
+                    out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;
+                    for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';      }       
+                }else {
+                    out << ">" << header.name << " xy=" << header.xy << endl;
+                    out << "0\t0\t0\t0";
+                }
                        }
                        else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
                                out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
@@ -1325,6 +1338,8 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade
 //**********************************************************************************************************************
 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
        try {
+        if (header.clipQualRight == 0) { header.clipQualRight = read.flowgram.size(); }
+        
                if(header.clipQualRight > header.clipQualLeft){
                        
                        int rightIndex = 0;
index ecdca9bfe60f44c972d821b5f82c7353ee9a9561..a53f4e85388f0f7bbb6fda40c2074b4f555e29d3 100644 (file)
@@ -144,6 +144,24 @@ TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, v
                barcodes = br;
                primers = pr;
                revPrimer = r;
+        
+        maxFBarcodeLength = 0;
+        for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
+            string oligo = it->first;
+            if(oligo.length() > maxFBarcodeLength){
+                maxFBarcodeLength = oligo.length();
+            }
+        }
+        maxRBarcodeLength = maxFBarcodeLength;
+        
+        maxFPrimerLength = 0;
+        for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+            string oligo = it->first;
+            if(oligo.length() > maxFPrimerLength){
+                maxFPrimerLength = oligo.length();
+            }
+        }
+        maxRPrimerLength = maxFPrimerLength;
        }
        catch(exception& e) {
                m->errorOut(e, "TrimOligos", "TrimOligos");
@@ -152,7 +170,135 @@ TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, v
 }
 /********************************************************************/
 TrimOligos::~TrimOligos() {}
+//********************************************************************/
+bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
+       try {
+               
+               string rawSequence = seq.getUnaligned();
+               
+               for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+                       string oligo = it->first;
+                       
+                       if(rawSequence.length() < oligo.length()) {  break;  }
+                       
+                       //search for primer
+            int olength = oligo.length();
+            for (int j = 0; j < rawSequence.length()-olength; j++){
+                if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
+                string rawChunk = rawSequence.substr(j, olength);
+                if(compareDNASeq(oligo, rawChunk)) {
+                    primerStart = j;
+                    primerEnd = primerStart + olength;
+                    return true;
+                }
+                
+            }
+        }      
+               
+        primerStart = 0; primerEnd = 0;
+        //if you don't want to allow for diffs
+               if (pdiffs == 0) { return false;  }
+               else { //try aligning and see if you can find it
+                                               
+                       //can you find the barcode
+                       int minDiff = 1e6;
+                       int minCount = 1;
+                       
+            Alignment* alignment;
+            if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
+            else{ alignment = NULL; } 
+            
+                       for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+                
+                string prim = it->first;
+                //search for primer
+                int olength = prim.length();
+                if (rawSequence.length() < olength) {} //ignore primers too long for this seq
+                else{
+                    for (int j = 0; j < rawSequence.length()-olength; j++){
+                        
+                        string oligo = it->first;
+
+                        if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
+                               
+                        string rawChunk = rawSequence.substr(j, olength+pdiffs);
+                        
+                        //use needleman to align first primer.length()+numdiffs of sequence to each barcode
+                        alignment->align(oligo, rawChunk);
+                        oligo = alignment->getSeqAAln();
+                        string temp = alignment->getSeqBAln();
+                               
+                        int alnLength = oligo.length();
+                               
+                        for(int i=oligo.length()-1;i>=0;i--){
+                            if(oligo[i] != '-'){       alnLength = i+1;        break;  }
+                        }
+                        oligo = oligo.substr(0,alnLength);
+                        temp = temp.substr(0,alnLength);
+                               
+                        int numDiff = countDiffs(oligo, temp);
+                               
+                        if(numDiff < minDiff){
+                            minDiff = numDiff;
+                            minCount = 1;
+                            primerStart = j;
+                            primerEnd = primerStart + alnLength;
+                        }else if(numDiff == minDiff){ minCount++; }
+                    }
+                }
+                       }
+                       
+            if (alignment != NULL) {  delete alignment;  }
+            
+                       if(minDiff > pdiffs)    {       primerStart = 0; primerEnd = 0; return false;           }       //no good matches
+                       else if(minCount > 1)   {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
+                       else{  return true; }
+               }
+       
+        primerStart = 0; primerEnd = 0;
+               return false;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripForward");
+               exit(1);
+       }
+}
+//******************************************************************/
+bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
+       try {
+               
+               string rawSequence = seq.getUnaligned();
+               
+               for(int i=0;i<revPrimer.size();i++){
+                       string oligo = revPrimer[i];
+                       if(rawSequence.length() < oligo.length()) {  break;  }
+                       
+                       //search for primer
+            int olength = oligo.length();
+            for (int j = rawSequence.length()-olength; j >= 0; j--){
+                if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
+                string rawChunk = rawSequence.substr(j, olength);
+                
+                if(compareDNASeq(oligo, rawChunk)) {
+                    primerStart = j;
+                    primerEnd = primerStart + olength;
+                    return true;
+                }
+                
+            }
+               }       
+               
+        primerStart = 0; primerEnd = 0;
+               return false;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "findReverse");
+               exit(1);
+       }
+}
 //*******************************************************************/
+
 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
        try {
                
index 8aa44bdd365356f195069e4f11528300844fbc7a..d72b8146d36bc659ec13ddffc28463ef4a7b8204 100644 (file)
@@ -50,6 +50,11 @@ class TrimOligos {
     
         bool stripSpacer(Sequence&);
         bool stripSpacer(Sequence&, QualityScores&);
+    
+        //seq, primerStart, primerEnd
+        bool findForward(Sequence&, int&, int&);
+        bool findReverse(Sequence&, int&, int&);
+    
                                
        
        private: