X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=deuniqueseqscommand.cpp;h=a6c151c38318f4a98917e5b46f81f5b8cd7cfc27;hp=214359e3353f5521aa337085ce46136187d326c0;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=c69e2e9749626cfbf1d6cb0125ae94f869e00b18 diff --git a/deuniqueseqscommand.cpp b/deuniqueseqscommand.cpp index 214359e..a6c151c 100644 --- a/deuniqueseqscommand.cpp +++ b/deuniqueseqscommand.cpp @@ -9,51 +9,70 @@ #include "deuniqueseqscommand.h" #include "sequence.hpp" +#include "counttable.h" //********************************************************************************************************************** -vector DeUniqueSeqsCommand::getValidParameters(){ +vector DeUniqueSeqsCommand::setParameters(){ try { - string Array[] = {"fasta", "name","outputdir","inputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); + CommandParameter pname("name", "InputTypes", "", "", "namecount", "namecount", "none","name",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "namecount", "namecount", "none","group",false,false,true); parameters.push_back(pcount); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { - m->errorOut(e, "DeUniqueSeqsCommand", "getValidParameters"); + m->errorOut(e, "DeUniqueSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** -DeUniqueSeqsCommand::DeUniqueSeqsCommand(){ +string DeUniqueSeqsCommand::getHelpString(){ try { - abort = true; calledHelp = true; - vector tempOutNames; - outputTypes["fasta"] = tempOutNames; + string helpString = ""; + helpString += "The deunique.seqs command reads a fastafile and namefile or countfile, and creates a fastafile containing all the sequences. It you provide a count file with group information a group file is also created.\n"; + helpString += "The deunique.seqs command parameters are fasta, name and count. Fasta is required and you must provide either a name or count file.\n"; + helpString += "The deunique.seqs command should be in the following format: \n"; + helpString += "deunique.seqs(fasta=yourFastaFile, name=yourNameFile) \n"; + helpString += "Example deunique.seqs(fasta=abrecovery.unique.fasta, name=abrecovery.names).\n"; + helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"; + return helpString; } catch(exception& e) { - m->errorOut(e, "DeUniqueSeqsCommand", "DeconvoluteCommand"); + m->errorOut(e, "DeUniqueSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** -vector DeUniqueSeqsCommand::getRequiredParameters(){ - try { - string Array[] = {"fasta","name"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - return myArray; - } - catch(exception& e) { - m->errorOut(e, "DeUniqueSeqsCommand", "getRequiredParameters"); - exit(1); - } +string DeUniqueSeqsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "fasta") { pattern = "[filename],redundant.fasta"; } + else if (type == "group") { pattern = "[filename],redundant.groups"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "DeUniqueSeqsCommand", "getOutputPattern"); + exit(1); + } } //********************************************************************************************************************** -vector DeUniqueSeqsCommand::getRequiredFiles(){ +DeUniqueSeqsCommand::DeUniqueSeqsCommand(){ try { - vector myArray; - return myArray; + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["group"] = tempOutNames; } catch(exception& e) { - m->errorOut(e, "DeUniqueSeqsCommand", "getRequiredFiles"); + m->errorOut(e, "DeUniqueSeqsCommand", "DeconvoluteCommand"); exit(1); } } @@ -64,11 +83,10 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option) { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { - //valid paramters for this command - string Array[] = {"fasta", "name","outputdir","inputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters = parser.getParameters(); @@ -84,6 +102,7 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["group"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -105,23 +124,54 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["name"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } //check for required parameters fastaFile = validParameter.validFile(parameters, "fasta", true); if (fastaFile == "not open") { abort = true; } - else if (fastaFile == "not found") { fastaFile = ""; m->mothurOut("fasta is a required parameter for the deunique.seqs command."); m->mothurOutEndLine(); abort = true; } + else if (fastaFile == "not found") { + fastaFile = m->getFastaFile(); + if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setFastaFile(fastaFile); } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; - outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it } nameFile = validParameter.validFile(parameters, "name", true); if (nameFile == "not open") { abort = true; } - else if (nameFile == "not found"){ nameFile = ""; m->mothurOut("name is a required parameter for the deunique.seqs command."); m->mothurOutEndLine(); abort = true; } + else if (nameFile == "not found"){ nameFile = ""; } + else { m->setNameFile(nameFile); } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if ((countfile != "") && (nameFile != "")) { m->mothurOut("When executing a deunique.seqs command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; } + + + if ((countfile == "") && (nameFile == "")) { //look for currents + nameFile = m->getNameFile(); + if (nameFile != "") { m->mothurOut("Using " + nameFile + " as input file for the name parameter."); m->mothurOutEndLine(); } + else { + countfile = m->getCountTableFile(); + if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("[ERROR]: You have no current name or count files one is required."); m->mothurOutEndLine(); abort = true; } + } + } + } } @@ -130,24 +180,6 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option) { exit(1); } } -//********************************************************************************************************************** - -void DeUniqueSeqsCommand::help(){ - try { - m->mothurOut("The deunique.seqs command reads a fastafile and namefile, and creates a fastafile containing all the sequences.\n"); - m->mothurOut("The deunique.seqs command parameters are fasta and name, both are required.\n"); - m->mothurOut("The deunique.seqs command should be in the following format: \n"); - m->mothurOut("deunique.seqs(fasta=yourFastaFile, name=yourNameFile) \n"); - m->mothurOut("Example deunique.seqs(fasta=abrecovery.unique.fasta, name=abrecovery.names).\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); - - } - catch(exception& e) { - m->errorOut(e, "DeUniqueSeqsCommand", "help"); - exit(1); - } -} - /**************************************************************************************/ int DeUniqueSeqsCommand::execute() { try { @@ -156,105 +188,122 @@ int DeUniqueSeqsCommand::execute() { //prepare filenames and open files ofstream out; - string outFastaFile = m->getRootName(m->getSimpleName(fastaFile)); + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(fastaFile); } + string outFastaFile = thisOutputDir + m->getRootName(m->getSimpleName(fastaFile)); int pos = outFastaFile.find("unique"); - if (pos != string::npos) { - outFastaFile = outputDir + outFastaFile.substr(0, pos) + "redundant" + m->getExtension(fastaFile); - }else{ - outFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "redundant" + m->getExtension(fastaFile); - } + if (pos != string::npos) { outFastaFile = outputDir + outFastaFile.substr(0, pos); } + else { outFastaFile = outputDir + outFastaFile; } + map variables; + variables["[filename]"] = outFastaFile; + outFastaFile = getOutputFileName("fasta", variables); m->openOutputFile(outFastaFile, out); - readNamesFile(); - if (m->control_pressed) { out.close(); outputTypes.clear(); remove(outFastaFile.c_str()); return 0; } + map nameMap; + CountTable ct; + ofstream outGroup; + string outGroupFile; + vector groups; + if (nameFile != "") { m->readNames(nameFile, nameMap); } + else { + ct.readTable(countfile, true, false); + + if (ct.hasGroupInfo()) { + thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(countfile); } + outGroupFile = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); + variables["[filename]"] = outGroupFile; + outGroupFile = getOutputFileName("group", variables); + m->openOutputFile(outGroupFile, outGroup); + groups = ct.getNamesOfGroups(); + } + } + + if (m->control_pressed) { out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); m->mothurRemove(outGroupFile); } } return 0; } ifstream in; m->openInputFile(fastaFile, in); while (!in.eof()) { - if (m->control_pressed) { in.close(); out.close(); outputTypes.clear(); remove(outFastaFile.c_str()); return 0; } + if (m->control_pressed) { in.close(); out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); m->mothurRemove(outGroupFile); } } return 0; } Sequence seq(in); m->gobble(in); if (seq.getName() != "") { - //look for sequence name in nameMap - map::iterator it = nameMap.find(seq.getName()); - - if (it == nameMap.end()) { m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; } - else { - vector names; - m->splitAtComma(it->second, names); - - //output sequences - for (int i = 0; i < names.size(); i++) { - out << ">" << names[i] << endl; - out << seq.getAligned() << endl; - } - - //remove seq from name map so we can check for seqs in namefile not in fastafile later - nameMap.erase(it); - } + if (nameFile != "") { + //look for sequence name in nameMap + map::iterator it = nameMap.find(seq.getName()); + + if (it == nameMap.end()) { m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; } + else { + vector names; + m->splitAtComma(it->second, names); + + //output sequences + for (int i = 0; i < names.size(); i++) { + out << ">" << names[i] << endl; + out << seq.getAligned() << endl; + } + + //remove seq from name map so we can check for seqs in namefile not in fastafile later + nameMap.erase(it); + } + }else { + if (ct.hasGroupInfo()) { + vector groupCounts = ct.getGroupCounts(seq.getName()); + int count = 1; + for (int i = 0; i < groups.size(); i++) { + for (int j = 0; j < groupCounts[i]; j++) { + outGroup << seq.getName()+"_"+toString(count) << '\t' << groups[i] << endl; count++; + } + } + + } + + int numReps = ct.getNumSeqs(seq.getName()); //will report error and set m->control_pressed if not found + for (int i = 0; i < numReps; i++) { + out << ">" << seq.getName()+"_"+toString(i+1) << endl; + out << seq.getAligned() << endl; + } + } } } in.close(); - out.close(); + out.close(); + if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); } } - if (nameMap.size() != 0) { //then there are names in the namefile not in the fastafile - for (map::iterator it = nameMap.begin(); it != nameMap.end(); it++) { - m->mothurOut(it->first + " is not in your fasta file, but is in your name file. Please correct."); m->mothurOutEndLine(); - } - } - - if (m->control_pressed) { outputTypes.clear(); remove(outFastaFile.c_str()); return 0; } + + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { m->mothurRemove(outGroupFile); } }return 0; } + outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); + if (countfile != "") { if (ct.hasGroupInfo()) { outputNames.push_back(outGroupFile); outputTypes["group"].push_back(outGroupFile); } } + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outFastaFile); m->mothurOutEndLine(); - outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); - m->mothurOutEndLine(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "DeUniqueSeqsCommand", "execute"); - exit(1); - } -} -//********************************************************************************************************************** -int DeUniqueSeqsCommand::readNamesFile() { - try { + for(int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); - ifstream inNames; - m->openInputFile(nameFile, inNames); - string name, names; - map::iterator it; - - while(inNames){ - - if(m->control_pressed) { break; } - - inNames >> name; m->gobble(inNames); - inNames >> names; - - it = nameMap.find(name); - - if (it == nameMap.end()) { nameMap[name] = names; } - else { m->mothurOut("[ERROR]: Your namefile already contains " + name + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; } - - m->gobble(inNames); + //set fasta file as new current fastafile + string current = ""; + itTypes = outputTypes.find("fasta"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } + } + + itTypes = outputTypes.find("group"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } } - inNames.close(); + return 0; - } catch(exception& e) { - m->errorOut(e, "DeUniqueSeqsCommand", "readNamesFile"); + m->errorOut(e, "DeUniqueSeqsCommand", "execute"); exit(1); } } - /**************************************************************************************/