]> git.donarmstrong.com Git - mothur.git/commitdiff
added modify names parameter to set.dir
authorSarah Westcott <mothur.westcott@gmail.com>
Fri, 17 May 2013 13:47:21 +0000 (09:47 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Fri, 17 May 2013 13:47:21 +0000 (09:47 -0400)
13 files changed:
flowdata.cpp
makelookupcommand.cpp
mothurout.cpp
mothurout.h
parsefastaqcommand.cpp
prcseqscommand.cpp
preclustercommand.cpp
qualityscores.cpp
seqerrorcommand.cpp
sequence.cpp
setdircommand.cpp
setdircommand.h
trimseqscommand.cpp

index b2e856c28e5c939a00102e19d34a97799c11ae08..66261e33b922f17be93e3f929582ca9ea7d9208f 100644 (file)
@@ -68,9 +68,7 @@ string FlowData::getSequenceName(ifstream& flowFile) {
         flowFile >> name;
                
                if (name.length() != 0) { 
-            for (int i = 0; i < name.length(); i++) {
-                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-            }
+            m->checkName(name);
         }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
         
                return name;
index 59e26c7a73984c741aacdb994b497ef1362c9ed8..dd5bacfbc12aed0444fce46e3e836e4c2aea22d8 100644 (file)
@@ -40,8 +40,8 @@ string MakeLookupCommand::getHelpString(){
         helpString += "The flow parameter is used to provide the flow data. It is required.\n";
         helpString += "The error parameter is used to provide the error summary. It is required.\n";
         helpString += "The barcode parameter is used to provide the barcode sequence. Default=AACCGTGTC.\n";
-        helpString += "The key parameter is used to provide the key sequence. Default=TACG.\n";
-        helpString += "The threshold parameter is ....\n";
+        helpString += "The key parameter is used to provide the key sequence. Default=TCAG.\n";
+        helpString += "The threshold parameter is ....Default=10000.\n";
         helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
                helpString += "The make.lookup should be in the following format: make.lookup(reference=HMP_MOCK.v53.fasta, flow=H3YD4Z101.mock3.flow_450.flow, error=H3YD4Z101.mock3.flow_450.error.summary, barcode=AACCTGGC)\n";
                helpString += "new(...)\n";
index fc16d204b142e5a6f55783f56bc52aafb8e6a4d1..2900c7ec4fc8e0df952163554f6fefb7e622cd64 100644 (file)
@@ -2242,9 +2242,11 @@ map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeq
 /************************************************************/
 int MothurOut::checkName(string& name) {
     try {
-        for (int i = 0; i < name.length(); i++) {
-            if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
-        }        
+        if (modifyNames) {
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
+            }
+        }
         return 0;
     }
        catch(exception& e) {
index 643ecd58a149aebc1ac88f500d4658de98d5e145..845e6dd79fc5a7143b238d48bbc9deb1ae401df6 100644 (file)
@@ -70,7 +70,7 @@ class MothurOut {
                vector<string> binLabelsInFile;
                vector<string> currentBinLabels;
                string saveNextLabel, argv, sharedHeaderMode, groupMode;
-               bool printedHeaders, commandInputsConvertError, changedSeqNames;
+               bool printedHeaders, commandInputsConvertError, changedSeqNames, modifyNames;
                
                //functions from mothur.h
                //file operations
@@ -265,6 +265,7 @@ class MothurOut {
                        sharedHeaderMode = "";
             groupMode = "group";
             changedSeqNames = false;
+            modifyNames = true;
                }
                ~MothurOut();
 
index 74e3e2bc9f0bdedc88f90e7f923eafdb511470d6..051c1dfb057c0148c3f0053f94707b7e6d034dc0 100644 (file)
@@ -191,7 +191,7 @@ int ParseFastaQCommand::execute(){
                        else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        else { 
                 name = name.substr(1); 
-                for (int i = 0; i < name.length(); i++) { if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } }
+                m->checkName(name);
             }
                        
                        //read sequence
@@ -204,7 +204,7 @@ int ParseFastaQCommand::execute(){
                        else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        else { 
                 name2 = name2.substr(1);  
-                for (int i = 0; i < name2.length(); i++) { if (name2[i] == ':') { name2[i] = '_'; m->changedSeqNames = true; } }
+                m->checkName(name2);
             }
                        
                        //read quality scores
index d31d687c0af871ed4d5bbbde157c671900aa67d2..8a8db37612937ee86d7d11adf1b9973d8cba634d 100644 (file)
@@ -974,14 +974,35 @@ bool PcrSeqsCommand::readOligos(){
                         else if (c == 32 || c == 9){;} //space or tab
                                        } 
                                        primers[oligo] = primerCount; primerCount++;
+                    //cout << "for oligo = " << oligo  << endl;
                 }else if(type == "REVERSE"){
                     string oligoRC = reverseOligo(oligo);
                     revPrimer.push_back(oligoRC);
-                    //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
+                    //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl;
                                }else if(type == "BARCODE"){
-                                       inOligos >> group;
+                    inOligos >> group;
+                }else if(type == "PRIMER"){
+                                       m->gobble(inOligos);
+                    primers[oligo] = primerCount; primerCount++;
+                                       
+                    string roligo="";
+                    inOligos >> roligo;
+                    
+                    for(int i=0;i<roligo.length();i++){
+                        roligo[i] = toupper(roligo[i]);
+                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
+                    }
+                    revPrimer.push_back(reverseOligo(roligo));
+                    
+                    // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                        char c = inOligos.get();
+                        if (c == 10 || c == 13 || c == -1){    break;  }
+                        else if (c == 32 || c == 9){;} //space or tab
+                                       }
+                    //cout << "prim oligo = " << oligo << " reverse = " << roligo << endl;
                                }else if((type == "LINKER")||(type == "SPACER")) {;}
-                               else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                               else{   m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
                        }
                        m->gobble(inOligos);
                }       
index 24d22845c5a4fc33d109888e5f2007d25134672e..bd3f172c2bcaa3e13ced0b5c57d92e39c9073969 100644 (file)
@@ -903,15 +903,9 @@ void PreClusterCommand::readNameFile(){
                while (!in.eof()) {
                        in >> firstCol >> secondCol; m->gobble(in);
             
-            for (int i = 0; i < firstCol.length(); i++) {
-                if (firstCol[i] == ':') { firstCol[i] = '_'; m->changedSeqNames = true; }
-            }
-            
-            int size = 1;
-            for (int i = 0; i < secondCol.length(); i++) {
-                if (secondCol[i] == ':') { secondCol[i] = '_'; m->changedSeqNames = true; }
-                else if(secondCol[i] == ','){  size++; }
-            }
+            m->checkName(firstCol);
+            m->checkName(secondCol);
+            int size = m->getNumNames(secondCol);
             
                        names[firstCol] = secondCol;
             sizes[firstCol] = size;
index 33ca1728052e4db59372fc2d8129e5bcbaf729c5..26492245e2b9805144125052d57045a834bc3f95 100644 (file)
@@ -89,9 +89,7 @@ string QualityScores::getSequenceName(ifstream& qFile) {
             
                        name = name.substr(1); 
             
-            for (int i = 0; i < name.length(); i++) {
-                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-            }
+            m->checkName(name);
             
         }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
         
@@ -106,10 +104,7 @@ string QualityScores::getSequenceName(ifstream& qFile) {
 void QualityScores::setName(string name) {
        try {
       
-        for (int i = 0; i < name.length(); i++) {
-            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-        }     
-    
+        m->checkName(name);   
         seqName = name;
        }
        catch(exception& e) {
index 54d3dcc5c9b6c93f267478e6491126b3aedff772..5879241372dd3b60b34683683a0c1edbcbd64d57 100644 (file)
@@ -1412,9 +1412,7 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam
                                        string sname = "";  nameStream >> sname;
                                        sname = sname.substr(1);
                     
-                    for (int i = 0; i < sname.length(); i++) {
-                        if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
-                    }
+                    m->checkName(sname);
                                        
                                        map<string, int>::iterator it = firstSeqNames.find(sname);
                                        
@@ -1475,9 +1473,7 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam
                     istringstream nameStream(input);
                     string sname = "";  nameStream >> sname;
                     
-                    for (int i = 0; i < sname.length(); i++) {
-                        if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
-                    }
+                    m->checkName(sname);
                     
                     map<string, int>::iterator it = firstSeqNamesReport.find(sname);
                 
index ddc7d4c5a410ca99a415ee1e15d7c60b2199060f..d6073d75da3ed630b27b626bbba8615d79b58858 100644 (file)
@@ -21,9 +21,7 @@ Sequence::Sequence(string newName, string sequence) {
                initialize();   
                name = newName;
         
-        for (int i = 0; i < name.length(); i++) {
-            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-        }
+        m->checkName(name);
                
                //setUnaligned removes any gap characters for us
                setUnaligned(sequence);
@@ -41,9 +39,7 @@ Sequence::Sequence(string newName, string sequence, string justUnAligned) {
                initialize();   
                name = newName;
         
-        for (int i = 0; i < name.length(); i++) {
-            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-        }
+        m->checkName(name);
                
                //setUnaligned removes any gap characters for us
                setUnaligned(sequence);
@@ -295,9 +291,7 @@ string Sequence::getSequenceName(ifstream& fastaFile) {
             
                        name = name.substr(1); 
             
-            for (int i = 0; i < name.length(); i++) {
-                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-            }
+            m->checkName(name);
             
         }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
         
@@ -319,9 +313,7 @@ string Sequence::getSequenceName(istringstream& fastaFile) {
             
                        name = name.substr(1); 
             
-            for (int i = 0; i < name.length(); i++) {
-                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-            }
+            m->checkName(name);
             
         }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
         
index 4536d9516688fe6db1d9c10e709e789c40abe93c..dc4c9ac3baa8dcaafadc9e825ceb598ab4c1cabf 100644 (file)
@@ -14,6 +14,7 @@ vector<string> SetDirectoryCommand::setParameters(){
        try {
                CommandParameter ptempdefault("tempdefault", "String", "", "", "", "", "","",false,false); parameters.push_back(ptempdefault);
         CommandParameter pdebug("debug", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdebug);
+        CommandParameter pmodnames("modifynames", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmodnames);
                CommandParameter pinput("input", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pinput);
                CommandParameter poutput("output", "String", "", "", "", "", "","",false,false,true); parameters.push_back(poutput);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
@@ -36,6 +37,7 @@ string SetDirectoryCommand::getHelpString(){
                helpString += "The set.dir command can also be used to specify the directory where your input files are located, the directory must exist.\n";
                helpString += "The set.dir command can also be used to override or set the default location mothur will look for files if it is unable to find them, the directory must exist.\n";
         helpString += "The set.dir command can also be used to run mothur in debug mode.\n";
+        helpString += "The set.dir command can also be used to set the modifynames parameter. Default=t, meaning if your sequence names contain ':' change them to '_' to aviod issues while making trees.  modifynames=F will leave sequence names as they are.\n";
                helpString += "The set.dir command parameters are input, output, tempdefault and debug and one is required.\n";
         helpString += "To run mothur in debug mode set debug=true. Default debug=false.\n";
                helpString += "To return the output to the same directory as the input files you may enter: output=clear.\n";
@@ -93,10 +95,17 @@ SetDirectoryCommand::SetDirectoryCommand(string option)  {
             else {  debug = m->isTrue(temp); }
             m->debug = debug;
             
+            bool nomod = false;
+            temp = validParameter.validFile(parameters, "modifynames", false);
+                       if (temp == "not found") {  modifyNames = true;  nomod=true; }
+            else {  modifyNames = m->isTrue(temp); }
+            m->modifyNames = modifyNames;
+            
             if (debug) { m->mothurOut("Setting [DEBUG] flag.\n"); }
+            
                                
-                       if ((input == "") && (output == "") && (tempdefault == "") && nodebug) {        
-                               m->mothurOut("You must provide either an input, output, tempdefault or debug for the set.outdir command."); m->mothurOutEndLine(); abort = true;
+                       if ((input == "") && (output == "") && (tempdefault == "") && nodebug && nomod) {
+                               m->mothurOut("You must provide either an input, output, tempdefault, debug or modifynames for the set.outdir command."); m->mothurOutEndLine(); abort = true;
                        }else if((input == "") && (output == "") && (tempdefault == "")) { debugOnly = true; }
                }
        }
index ab174c2b783eca5c3a7fc0d74a8a2f6c5abf4a70..36753edeb57f8601dd0a1f5309b9c1cf67109429 100644 (file)
@@ -37,7 +37,7 @@ public:
 private:
        CommandFactory* commandFactory;
        string output, input, tempdefault;
-       bool abort, debugOnly;
+       bool abort, debugOnly, modifyNames;
        vector<string> outputNames;
        
                
index 81b9111b17dd8bba332ad8a8267cc12bf16e235a..16d83de2596b9cf84ab4069789917878fa89559e 100644 (file)
@@ -1391,9 +1391,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
                         string sname = "";  nameStream >> sname;
                         sname = sname.substr(1);
                         
-                        for (int i = 0; i < sname.length(); i++) {
-                            if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
-                        }
+                        m->checkName(sname);
                         
                         map<string, int>::iterator it = firstSeqNames.find(sname);