X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=prcseqscommand.cpp;h=5fc9f988be038bb02a724e38cc11b2d9ddf1fa3e;hb=1a5c2356c1b955c6ec024b2baf9f46377ee7c72e;hp=d9c37769b07d24f40efbfef9e076da33f5c86c29;hpb=91a27e0483827c06c21c4fe89558923bbfe86573;p=mothur.git diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp index d9c3776..5fc9f98 100644 --- a/prcseqscommand.cpp +++ b/prcseqscommand.cpp @@ -11,21 +11,21 @@ //********************************************************************************************************************** vector PcrSeqsCommand::setParameters(){ try { - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(poligos); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); - CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax); - CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(pecoli); - CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart); - CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend); - CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch); - CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer); - CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); + CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false,true); parameters.push_back(poligos); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup); + CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false,true); parameters.push_back(ptax); + CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false); parameters.push_back(pecoli); + CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart); + CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend); + CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer); + CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -40,8 +40,15 @@ vector PcrSeqsCommand::setParameters(){ string PcrSeqsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The pcr.seqs command reads a fasta file ...\n"; - + helpString += "The pcr.seqs command reads a fasta file.\n"; + helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n"; + helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n"; + helpString += "The start parameter allows you to provide a starting position to trim to.\n"; + helpString += "The end parameter allows you to provide a ending position to trim from.\n"; + helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file. if nomatch=true, then do nothing to sequence.\n"; + helpString += "The processors parameter allows you to use multiple processors.\n"; + helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n"; + helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n"; helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"; helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n"; return helpString; @@ -51,8 +58,26 @@ string PcrSeqsCommand::getHelpString(){ exit(1); } } - - +//********************************************************************************************************************** +string PcrSeqsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "fasta") { pattern = "[filename],pcr,[extension]-[filename],[tag],pcr,[extension]"; } + else if (type == "taxonomy") { pattern = "[filename],pcr,[extension]"; } + else if (type == "name") { pattern = "[filename],pcr,[extension]"; } + else if (type == "group") { pattern = "[filename],pcr,[extension]"; } + else if (type == "count") { pattern = "[filename],pcr,[extension]"; } + else if (type == "accnos") { pattern = "[filename],bad.accnos"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "PcrSeqsCommand", "getOutputPattern"); + exit(1); + } +} //********************************************************************************************************************** PcrSeqsCommand::PcrSeqsCommand(){ @@ -64,6 +89,7 @@ PcrSeqsCommand::PcrSeqsCommand(){ outputTypes["taxonomy"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["name"] = tempOutNames; + outputTypes["count"] = tempOutNames; outputTypes["accnos"] = tempOutNames; } catch(exception& e) { @@ -103,6 +129,7 @@ PcrSeqsCommand::PcrSeqsCommand(string option) { outputTypes["group"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["accnos"] = tempOutNames; + outputTypes["count"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -156,11 +183,17 @@ PcrSeqsCommand::PcrSeqsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["group"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } //check for required parameters fastafile = validParameter.validFile(parameters, "fasta", true); @@ -171,7 +204,9 @@ PcrSeqsCommand::PcrSeqsCommand(string option) { }else if (fastafile == "not open") { fastafile = ""; abort = true; } else { m->setFastaFile(fastafile); } - + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); } + //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; @@ -200,14 +235,24 @@ PcrSeqsCommand::PcrSeqsCommand(string option) { else if(groupfile == "not open"){ groupfile = ""; abort = true; } else { m->setGroupFile(groupfile); } + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { countfile = ""; abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if ((namefile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true; + } + + if ((groupfile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true; + } + taxfile = validParameter.validFile(parameters, "taxonomy", true); if (taxfile == "not found"){ taxfile = ""; } else if(taxfile == "not open"){ taxfile = ""; abort = true; } else { m->setTaxonomyFile(taxfile); } - - temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; } - m->mothurConvert(temp, pdiffs); - + temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; } m->mothurConvert(temp, start); @@ -239,10 +284,12 @@ PcrSeqsCommand::PcrSeqsCommand(string option) { } //check to make sure you didn't forget the name file by mistake - if (namefile == "") { - vector files; files.push_back(fastafile); - parser.getNameFile(files); - } + if (countfile == "") { + if (namefile == "") { + vector files; files.push_back(fastafile); + parser.getNameFile(files); + } + } } } @@ -262,10 +309,13 @@ int PcrSeqsCommand::execute(){ string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); } - string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.fasta"; + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)); + variables["[extension]"] = m->getExtension(fastafile); + string trimSeqFile = getOutputFileName("fasta",variables); outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); - - string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.scrap.fasta"; + variables["[tag]"] = "scrap"; + string badSeqFile = getOutputFileName("fasta",variables); length = 0; @@ -313,7 +363,9 @@ int PcrSeqsCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (taxfile != "") { readTax(badNames); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } - + if (countfile != "") { readCount(badNames); } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -347,6 +399,11 @@ int PcrSeqsCommand::execute(){ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); } } + itTypes = outputTypes.find("count"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } + } + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); @@ -854,7 +911,9 @@ int PcrSeqsCommand::writeAccnos(set badNames){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos"; + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)); + string outputFileName = getOutputFileName("accnos",variables); outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName); ofstream out; @@ -916,7 +975,10 @@ int PcrSeqsCommand::readName(set& names){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(namefile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pcr" + m->getExtension(namefile); + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile)); + variables["[extension]"] = m->getExtension(namefile); + string outputFileName = getOutputFileName("name", variables); ofstream out; m->openOutputFile(outputFileName, out); @@ -974,7 +1036,10 @@ int PcrSeqsCommand::readGroup(set names){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pcr" + m->getExtension(groupfile); + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); + variables["[extension]"] = m->getExtension(groupfile); + string outputFileName = getOutputFileName("group", variables); ofstream out; m->openOutputFile(outputFileName, out); @@ -1021,7 +1086,11 @@ int PcrSeqsCommand::readTax(set names){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pcr" + m->getExtension(taxfile); + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)); + variables["[extension]"] = m->getExtension(taxfile); + string outputFileName = getOutputFileName("taxonomy", variables); + ofstream out; m->openOutputFile(outputFileName, out); @@ -1061,6 +1130,67 @@ int PcrSeqsCommand::readTax(set names){ exit(1); } } +//*************************************************************************************************************** +int PcrSeqsCommand::readCount(set badSeqNames){ + try { + ifstream in; + m->openInputFile(countfile, in); + set::iterator it; + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile)); + variables["[extension]"] = m->getExtension(countfile); + string goodCountFile = getOutputFileName("count", variables); + + outputNames.push_back(goodCountFile); outputTypes["count"].push_back(goodCountFile); + ofstream goodCountOut; m->openOutputFile(goodCountFile, goodCountOut); + + string headers = m->getline(in); m->gobble(in); + goodCountOut << headers << endl; + + string name, rest; int thisTotal, removedCount; removedCount = 0; + bool wroteSomething = false; + while (!in.eof()) { + + if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; } + + in >> name; m->gobble(in); + in >> thisTotal; m->gobble(in); + rest = m->getline(in); m->gobble(in); + + if (badSeqNames.count(name) != 0) { removedCount+=thisTotal; } + else{ + wroteSomething = true; + goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl; + } + } + in.close(); + goodCountOut.close(); + + if (m->control_pressed) { m->mothurRemove(goodCountFile); } + + if (wroteSomething == false) { m->mothurOut("Your count file contains only sequences from the .accnos file."); m->mothurOutEndLine(); } + + //check for groups that have been eliminated + CountTable ct; + if (ct.testGroups(goodCountFile)) { + ct.readTable(goodCountFile); + ct.printTable(goodCountFile); + } + + if (m->control_pressed) { m->mothurRemove(goodCountFile); } + + m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine(); + + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "PcrSeqsCommand", "readCOunt"); + exit(1); + } +} /**************************************************************************************/