From a8e2df1b96a57f5f29576b08361b86a96a8eff4f Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Fri, 17 May 2013 09:47:21 -0400 Subject: [PATCH] added modify names parameter to set.dir --- flowdata.cpp | 4 +--- makelookupcommand.cpp | 4 ++-- mothurout.cpp | 8 +++++--- mothurout.h | 3 ++- parsefastaqcommand.cpp | 4 ++-- prcseqscommand.cpp | 27 ++++++++++++++++++++++++--- preclustercommand.cpp | 12 +++--------- qualityscores.cpp | 9 ++------- seqerrorcommand.cpp | 8 ++------ sequence.cpp | 16 ++++------------ setdircommand.cpp | 13 +++++++++++-- setdircommand.h | 2 +- trimseqscommand.cpp | 4 +--- 13 files changed, 60 insertions(+), 54 deletions(-) diff --git a/flowdata.cpp b/flowdata.cpp index b2e856c..66261e3 100644 --- a/flowdata.cpp +++ b/flowdata.cpp @@ -68,9 +68,7 @@ string FlowData::getSequenceName(ifstream& flowFile) { flowFile >> name; if (name.length() != 0) { - for (int i = 0; i < name.length(); i++) { - if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } - } + m->checkName(name); }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; } return name; diff --git a/makelookupcommand.cpp b/makelookupcommand.cpp index 59e26c7..dd5bacf 100644 --- a/makelookupcommand.cpp +++ b/makelookupcommand.cpp @@ -40,8 +40,8 @@ string MakeLookupCommand::getHelpString(){ helpString += "The flow parameter is used to provide the flow data. It is required.\n"; helpString += "The error parameter is used to provide the error summary. It is required.\n"; helpString += "The barcode parameter is used to provide the barcode sequence. Default=AACCGTGTC.\n"; - helpString += "The key parameter is used to provide the key sequence. Default=TACG.\n"; - helpString += "The threshold parameter is ....\n"; + helpString += "The key parameter is used to provide the key sequence. Default=TCAG.\n"; + helpString += "The threshold parameter is ....Default=10000.\n"; helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"; helpString += "The make.lookup should be in the following format: make.lookup(reference=HMP_MOCK.v53.fasta, flow=H3YD4Z101.mock3.flow_450.flow, error=H3YD4Z101.mock3.flow_450.error.summary, barcode=AACCTGGC)\n"; helpString += "new(...)\n"; diff --git a/mothurout.cpp b/mothurout.cpp index fc16d20..2900c7e 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -2242,9 +2242,11 @@ map MothurOut::readNames(string namefile, unsigned long int& numSeq /************************************************************/ int MothurOut::checkName(string& name) { try { - for (int i = 0; i < name.length(); i++) { - if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; } - } + if (modifyNames) { + for (int i = 0; i < name.length(); i++) { + if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; } + } + } return 0; } catch(exception& e) { diff --git a/mothurout.h b/mothurout.h index 643ecd5..845e6dd 100644 --- a/mothurout.h +++ b/mothurout.h @@ -70,7 +70,7 @@ class MothurOut { vector binLabelsInFile; vector currentBinLabels; string saveNextLabel, argv, sharedHeaderMode, groupMode; - bool printedHeaders, commandInputsConvertError, changedSeqNames; + bool printedHeaders, commandInputsConvertError, changedSeqNames, modifyNames; //functions from mothur.h //file operations @@ -265,6 +265,7 @@ class MothurOut { sharedHeaderMode = ""; groupMode = "group"; changedSeqNames = false; + modifyNames = true; } ~MothurOut(); diff --git a/parsefastaqcommand.cpp b/parsefastaqcommand.cpp index 74e3e2b..051c1df 100644 --- a/parsefastaqcommand.cpp +++ b/parsefastaqcommand.cpp @@ -191,7 +191,7 @@ int ParseFastaQCommand::execute(){ else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; } else { name = name.substr(1); - for (int i = 0; i < name.length(); i++) { if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } } + m->checkName(name); } //read sequence @@ -204,7 +204,7 @@ int ParseFastaQCommand::execute(){ else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; } else { name2 = name2.substr(1); - for (int i = 0; i < name2.length(); i++) { if (name2[i] == ':') { name2[i] = '_'; m->changedSeqNames = true; } } + m->checkName(name2); } //read quality scores diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp index d31d687..8a8db37 100644 --- a/prcseqscommand.cpp +++ b/prcseqscommand.cpp @@ -974,14 +974,35 @@ bool PcrSeqsCommand::readOligos(){ else if (c == 32 || c == 9){;} //space or tab } primers[oligo] = primerCount; primerCount++; + //cout << "for oligo = " << oligo << endl; }else if(type == "REVERSE"){ string oligoRC = reverseOligo(oligo); revPrimer.push_back(oligoRC); - //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl; + //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl; }else if(type == "BARCODE"){ - inOligos >> group; + inOligos >> group; + }else if(type == "PRIMER"){ + m->gobble(inOligos); + primers[oligo] = primerCount; primerCount++; + + string roligo=""; + inOligos >> roligo; + + for(int i=0;imothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; } + else{ m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; } } m->gobble(inOligos); } diff --git a/preclustercommand.cpp b/preclustercommand.cpp index 24d2284..bd3f172 100644 --- a/preclustercommand.cpp +++ b/preclustercommand.cpp @@ -903,15 +903,9 @@ void PreClusterCommand::readNameFile(){ while (!in.eof()) { in >> firstCol >> secondCol; m->gobble(in); - for (int i = 0; i < firstCol.length(); i++) { - if (firstCol[i] == ':') { firstCol[i] = '_'; m->changedSeqNames = true; } - } - - int size = 1; - for (int i = 0; i < secondCol.length(); i++) { - if (secondCol[i] == ':') { secondCol[i] = '_'; m->changedSeqNames = true; } - else if(secondCol[i] == ','){ size++; } - } + m->checkName(firstCol); + m->checkName(secondCol); + int size = m->getNumNames(secondCol); names[firstCol] = secondCol; sizes[firstCol] = size; diff --git a/qualityscores.cpp b/qualityscores.cpp index 33ca172..2649224 100644 --- a/qualityscores.cpp +++ b/qualityscores.cpp @@ -89,9 +89,7 @@ string QualityScores::getSequenceName(ifstream& qFile) { name = name.substr(1); - for (int i = 0; i < name.length(); i++) { - if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } - } + m->checkName(name); }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; } @@ -106,10 +104,7 @@ string QualityScores::getSequenceName(ifstream& qFile) { void QualityScores::setName(string name) { try { - for (int i = 0; i < name.length(); i++) { - if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } - } - + m->checkName(name); seqName = name; } catch(exception& e) { diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp index 54d3dcc..5879241 100644 --- a/seqerrorcommand.cpp +++ b/seqerrorcommand.cpp @@ -1412,9 +1412,7 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam string sname = ""; nameStream >> sname; sname = sname.substr(1); - for (int i = 0; i < sname.length(); i++) { - if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; } - } + m->checkName(sname); map::iterator it = firstSeqNames.find(sname); @@ -1475,9 +1473,7 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam istringstream nameStream(input); string sname = ""; nameStream >> sname; - for (int i = 0; i < sname.length(); i++) { - if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; } - } + m->checkName(sname); map::iterator it = firstSeqNamesReport.find(sname); diff --git a/sequence.cpp b/sequence.cpp index ddc7d4c..d6073d7 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -21,9 +21,7 @@ Sequence::Sequence(string newName, string sequence) { initialize(); name = newName; - for (int i = 0; i < name.length(); i++) { - if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } - } + m->checkName(name); //setUnaligned removes any gap characters for us setUnaligned(sequence); @@ -41,9 +39,7 @@ Sequence::Sequence(string newName, string sequence, string justUnAligned) { initialize(); name = newName; - for (int i = 0; i < name.length(); i++) { - if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } - } + m->checkName(name); //setUnaligned removes any gap characters for us setUnaligned(sequence); @@ -295,9 +291,7 @@ string Sequence::getSequenceName(ifstream& fastaFile) { name = name.substr(1); - for (int i = 0; i < name.length(); i++) { - if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } - } + m->checkName(name); }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; } @@ -319,9 +313,7 @@ string Sequence::getSequenceName(istringstream& fastaFile) { name = name.substr(1); - for (int i = 0; i < name.length(); i++) { - if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } - } + m->checkName(name); }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; } diff --git a/setdircommand.cpp b/setdircommand.cpp index 4536d95..dc4c9ac 100644 --- a/setdircommand.cpp +++ b/setdircommand.cpp @@ -14,6 +14,7 @@ vector SetDirectoryCommand::setParameters(){ try { CommandParameter ptempdefault("tempdefault", "String", "", "", "", "", "","",false,false); parameters.push_back(ptempdefault); CommandParameter pdebug("debug", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdebug); + CommandParameter pmodnames("modifynames", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmodnames); CommandParameter pinput("input", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pinput); CommandParameter poutput("output", "String", "", "", "", "", "","",false,false,true); parameters.push_back(poutput); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); @@ -36,6 +37,7 @@ string SetDirectoryCommand::getHelpString(){ helpString += "The set.dir command can also be used to specify the directory where your input files are located, the directory must exist.\n"; helpString += "The set.dir command can also be used to override or set the default location mothur will look for files if it is unable to find them, the directory must exist.\n"; helpString += "The set.dir command can also be used to run mothur in debug mode.\n"; + helpString += "The set.dir command can also be used to set the modifynames parameter. Default=t, meaning if your sequence names contain ':' change them to '_' to aviod issues while making trees. modifynames=F will leave sequence names as they are.\n"; helpString += "The set.dir command parameters are input, output, tempdefault and debug and one is required.\n"; helpString += "To run mothur in debug mode set debug=true. Default debug=false.\n"; helpString += "To return the output to the same directory as the input files you may enter: output=clear.\n"; @@ -93,10 +95,17 @@ SetDirectoryCommand::SetDirectoryCommand(string option) { else { debug = m->isTrue(temp); } m->debug = debug; + bool nomod = false; + temp = validParameter.validFile(parameters, "modifynames", false); + if (temp == "not found") { modifyNames = true; nomod=true; } + else { modifyNames = m->isTrue(temp); } + m->modifyNames = modifyNames; + if (debug) { m->mothurOut("Setting [DEBUG] flag.\n"); } + - if ((input == "") && (output == "") && (tempdefault == "") && nodebug) { - m->mothurOut("You must provide either an input, output, tempdefault or debug for the set.outdir command."); m->mothurOutEndLine(); abort = true; + if ((input == "") && (output == "") && (tempdefault == "") && nodebug && nomod) { + m->mothurOut("You must provide either an input, output, tempdefault, debug or modifynames for the set.outdir command."); m->mothurOutEndLine(); abort = true; }else if((input == "") && (output == "") && (tempdefault == "")) { debugOnly = true; } } } diff --git a/setdircommand.h b/setdircommand.h index ab174c2..36753ed 100644 --- a/setdircommand.h +++ b/setdircommand.h @@ -37,7 +37,7 @@ public: private: CommandFactory* commandFactory; string output, input, tempdefault; - bool abort, debugOnly; + bool abort, debugOnly, modifyNames; vector outputNames; diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 81b9111..16d83de 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -1391,9 +1391,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { string sname = ""; nameStream >> sname; sname = sname.substr(1); - for (int i = 0; i < sname.length(); i++) { - if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; } - } + m->checkName(sname); map::iterator it = firstSeqNames.find(sname); -- 2.39.2