X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=splitabundcommand.cpp;h=9c514a9371ae37a24114c3eacf59424733040cfc;hb=d4429ccc354708f3c9a13c809ba9b57c22908d2b;hp=17809631f9e9cc590e510a33ec5528e493a14d3c;hpb=a76d81690125ca57d7f602ac93abad75cf9796c2;p=mothur.git diff --git a/splitabundcommand.cpp b/splitabundcommand.cpp index 1780963..9c514a9 100644 --- a/splitabundcommand.cpp +++ b/splitabundcommand.cpp @@ -9,19 +9,82 @@ #include "splitabundcommand.h" +//********************************************************************************************************************** +vector SplitAbundCommand::setParameters(){ + try { + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); + CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname); + CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); + CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist); + CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); + CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff); + CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); + CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(paccnos); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string SplitAbundCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n"; + helpString += "The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n"; + helpString += "The fasta and a list or name parameter are required, and you must provide a cutoff value.\n"; + helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n"; + helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n"; + helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n"; + helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n"; + helpString += "The groups parameter allows you to parse the files into rare and abundant files by group. \n"; + helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n"; + helpString += "If you want .abund and .rare files for all groups, set groups=all. \n"; + helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n"; + helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n"; + helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "getHelpString"); + exit(1); + } +} + +//********************************************************************************************************************** +SplitAbundCommand::SplitAbundCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["list"] = tempOutNames; + outputTypes["name"] = tempOutNames; + outputTypes["accnos"] = tempOutNames; + outputTypes["group"] = tempOutNames; + outputTypes["fasta"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand"); + exit(1); + } +} //********************************************************************************************************************** SplitAbundCommand::SplitAbundCommand(string option) { try { - abort = false; + abort = false; calledHelp = false; allLines = 1; //allow user to run help - if(option == "help") { help(); abort = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } else { - //valid paramters for this command - string Array[] = {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; // - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters = parser.getParameters(); @@ -33,7 +96,15 @@ SplitAbundCommand::SplitAbundCommand(string option) { for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } - + + //initialize outputTypes + vector tempOutNames; + outputTypes["list"] = tempOutNames; + outputTypes["name"] = tempOutNames; + outputTypes["accnos"] = tempOutNames; + outputTypes["group"] = tempOutNames; + outputTypes["fasta"] = tempOutNames; + //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } @@ -90,7 +161,11 @@ SplitAbundCommand::SplitAbundCommand(string option) { fastafile = validParameter.validFile(parameters, "fasta", true); if (fastafile == "not open") { abort = true; } - else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true; } + else if (fastafile == "not found") { + fastafile = m->getFastaFile(); + if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; } + } groupfile = validParameter.validFile(parameters, "group", true); if (groupfile == "not open") { groupfile = ""; abort = true; } @@ -115,7 +190,15 @@ SplitAbundCommand::SplitAbundCommand(string option) { if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); } //do you have all files needed - if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true; } + if ((listfile == "") && (namefile == "")) { + namefile = m->getNameFile(); + if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); } + else { + listfile = m->getListFile(); + if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current list or namefile and the list or name parameter is required."); m->mothurOutEndLine(); abort = true; } + } + } //check for optional parameter and set defaults // ...at some point should added some additional type checking... @@ -143,29 +226,6 @@ SplitAbundCommand::SplitAbundCommand(string option) { } } //********************************************************************************************************************** -void SplitAbundCommand::help(){ - try { - m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n"); - m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n"); - m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n"); - m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n"); - m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n"); - m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n"); - m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n"); - m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group. \n"); - m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n"); - m->mothurOut("If you want .abund and .rare files for all groups, set groups=all. \n"); - m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n"); - m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n"); - - } - catch(exception& e) { - m->errorOut(e, "SplitAbundCommand", "help"); - exit(1); - } -} -//********************************************************************************************************************** SplitAbundCommand::~SplitAbundCommand(){ if (groupfile != "") { delete groupMap; } } @@ -173,28 +233,10 @@ SplitAbundCommand::~SplitAbundCommand(){ int SplitAbundCommand::execute(){ try { - if (abort == true) { return 0; } + if (abort == true) { if (calledHelp) { return 0; } return 2; } if (listfile != "") { //you are using a listfile to determine abundance if (outputDir == "") { outputDir = m->hasPath(listfile); } - - //remove old files so you can append later.... - string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile)); - if (Groups.size() == 0) { - remove((fileroot + "rare.list").c_str()); - remove((fileroot + "abund.list").c_str()); - - outputNames.push_back((fileroot + "rare.list")); - outputNames.push_back((fileroot + "abund.list")); - }else{ - for (int i=0; i processedLabels; @@ -290,7 +332,34 @@ int SplitAbundCommand::execute(){ if (accnos) { writeAccnos(tag); } if (fastafile != "") { parseFasta(tag); } } - + + //set fasta file as new current fastafile + string current = ""; + itTypes = outputTypes.find("fasta"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } + } + + itTypes = outputTypes.find("name"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } + } + + itTypes = outputTypes.find("group"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } + } + + itTypes = outputTypes.find("list"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } + } + + itTypes = outputTypes.find("accnos"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); } + } + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -326,9 +395,11 @@ int SplitAbundCommand::splitList(ListVector* thisList) { } }//end for - writeList(thisList); string tag = thisList->getLabel() + "."; + + writeList(thisList, tag); + if (groupfile != "") { parseGroup(tag); } if (accnos) { writeAccnos(tag); } if (fastafile != "") { parseFasta(tag); } @@ -342,7 +413,7 @@ int SplitAbundCommand::splitList(ListVector* thisList) { } } /**********************************************************************************************************************/ -int SplitAbundCommand::writeList(ListVector* thisList) { +int SplitAbundCommand::writeList(ListVector* thisList, string tag) { try { map filehandles; @@ -364,13 +435,13 @@ int SplitAbundCommand::writeList(ListVector* thisList) { ofstream aout; ofstream rout; - string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + "rare.list"; - m->openOutputFileAppend(rare, rout); - //outputNames.push_back(rare); + string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list"; + m->openOutputFile(rare, rout); + outputNames.push_back(rare); outputTypes["list"].push_back(rare); - string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + "abund.list"; - m->openOutputFileAppend(abund, aout); - //outputNames.push_back(abund); + string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list"; + m->openOutputFile(abund, aout); + outputNames.push_back(abund); outputTypes["list"].push_back(abund); if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; } if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; } @@ -406,8 +477,10 @@ int SplitAbundCommand::writeList(ListVector* thisList) { temp2 = new ofstream; filehandles[Groups[i]+".abund"] = temp2; - m->openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"])); - m->openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"])); + m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"])); + m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"])); + outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list"); + outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list"); } map groupVector; @@ -572,11 +645,11 @@ int SplitAbundCommand::writeNames() { //namefile string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare.names"; m->openOutputFile(rare, rout); - outputNames.push_back(rare); + outputNames.push_back(rare); outputTypes["name"].push_back(rare); string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund.names"; m->openOutputFile(abund, aout); - outputNames.push_back(abund); + outputNames.push_back(abund); outputTypes["name"].push_back(abund); if (rareNames.size() != 0) { for (set::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) { @@ -642,7 +715,7 @@ int SplitAbundCommand::writeNames() { //namefile for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(filehandles[it3->first])).close(); - outputNames.push_back(fileroot + it3->first + ".names"); + outputNames.push_back(fileroot + it3->first + ".names"); outputTypes["name"].push_back(fileroot + it3->first + ".names"); delete it3->second; } } @@ -669,7 +742,7 @@ int SplitAbundCommand::writeAccnos(string tag) { string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare.accnos"; m->openOutputFile(rare, rout); - outputNames.push_back(rare); + outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); for (set::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) { rout << (*itRare) << endl; @@ -678,7 +751,7 @@ int SplitAbundCommand::writeAccnos(string tag) { string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund.accnos"; m->openOutputFile(abund, aout); - outputNames.push_back(abund); + outputNames.push_back(abund); outputTypes["accnos"].push_back(abund); for (set::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) { aout << (*itAbund) << endl; @@ -723,7 +796,7 @@ int SplitAbundCommand::writeAccnos(string tag) { //close files for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(filehandles[it3->first])).close(); - outputNames.push_back(fileroot + tag + it3->first + ".accnos"); + outputNames.push_back(fileroot + tag + it3->first + ".accnos"); outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos"); delete it3->second; } } @@ -748,11 +821,11 @@ int SplitAbundCommand::parseGroup(string tag) { //namefile string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare.groups"; m->openOutputFile(rare, rout); - outputNames.push_back(rare); + outputNames.push_back(rare); outputTypes["group"].push_back(rare); string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund.groups"; m->openOutputFile(abund, aout); - outputNames.push_back(abund); + outputNames.push_back(abund); outputTypes["group"].push_back(abund); for (map::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) { vector names; @@ -817,7 +890,7 @@ int SplitAbundCommand::parseGroup(string tag) { //namefile for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(filehandles[it3->first])).close(); - outputNames.push_back(fileroot + tag + it3->first + ".groups"); + outputNames.push_back(fileroot + tag + it3->first + ".groups"); outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups"); delete it3->second; } } @@ -842,11 +915,11 @@ int SplitAbundCommand::parseFasta(string tag) { //namefile string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare.fasta"; m->openOutputFile(rare, rout); - outputNames.push_back(rare); + outputNames.push_back(rare); outputTypes["fasta"].push_back(rare); string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund.fasta"; m->openOutputFile(abund, aout); - outputNames.push_back(abund); + outputNames.push_back(abund); outputTypes["fasta"].push_back(abund); //open input file ifstream in; @@ -937,7 +1010,7 @@ int SplitAbundCommand::parseFasta(string tag) { //namefile for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(filehandles[it3->first])).close(); - outputNames.push_back(fileroot + tag + it3->first + ".fasta"); + outputNames.push_back(fileroot + tag + it3->first + ".fasta"); outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta"); delete it3->second; } }