X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=splitabundcommand.cpp;h=bc1cdb3ecfaa8ca528b88c2534e40c61263aacef;hb=5b72d1cf3fa48730e5bb70d59cced1e43e1fe424;hp=891fb979e3aa533a53453628f2701e790161fbcc;hpb=83b5acbe3d6087a6cd73e80dde4b923387a01d01;p=mothur.git diff --git a/splitabundcommand.cpp b/splitabundcommand.cpp index 891fb97..bc1cdb3 100644 --- a/splitabundcommand.cpp +++ b/splitabundcommand.cpp @@ -9,19 +9,105 @@ #include "splitabundcommand.h" +//********************************************************************************************************************** +vector SplitAbundCommand::setParameters(){ + try { + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); + CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname); + CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); + CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist); + CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); + CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff); + CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); + CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(paccnos); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string SplitAbundCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n"; + helpString += "The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n"; + helpString += "The fasta and a list or name parameter are required, and you must provide a cutoff value.\n"; + helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n"; + helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n"; + helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n"; + helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n"; + helpString += "The groups parameter allows you to parse the files into rare and abundant files by group. \n"; + helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n"; + helpString += "If you want .abund and .rare files for all groups, set groups=all. \n"; + helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n"; + helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n"; + helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +string SplitAbundCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "fasta") { outputFileName = "fasta"; } + else if (type == "list") { outputFileName = "list"; } + else if (type == "name") { outputFileName = "names"; } + else if (type == "group") { outputFileName = "groups"; } + else if (type == "accnos") { outputFileName = "accnos"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** +SplitAbundCommand::SplitAbundCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["list"] = tempOutNames; + outputTypes["name"] = tempOutNames; + outputTypes["accnos"] = tempOutNames; + outputTypes["group"] = tempOutNames; + outputTypes["fasta"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand"); + exit(1); + } +} //********************************************************************************************************************** SplitAbundCommand::SplitAbundCommand(string option) { try { - abort = false; + abort = false; calledHelp = false; allLines = 1; //allow user to run help - if(option == "help") { help(); abort = true; } - + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { - //valid paramters for this command - string Array[] = {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; // - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters = parser.getParameters(); @@ -33,7 +119,15 @@ SplitAbundCommand::SplitAbundCommand(string option) { for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } - + + //initialize outputTypes + vector tempOutNames; + outputTypes["list"] = tempOutNames; + outputTypes["name"] = tempOutNames; + outputTypes["accnos"] = tempOutNames; + outputTypes["group"] = tempOutNames; + outputTypes["fasta"] = tempOutNames; + //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } @@ -42,7 +136,7 @@ SplitAbundCommand::SplitAbundCommand(string option) { it = parameters.find("list"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["list"] = inputDir + it->second; } } @@ -50,7 +144,7 @@ SplitAbundCommand::SplitAbundCommand(string option) { it = parameters.find("group"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["group"] = inputDir + it->second; } } @@ -58,7 +152,7 @@ SplitAbundCommand::SplitAbundCommand(string option) { it = parameters.find("fasta"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["fasta"] = inputDir + it->second; } } @@ -66,7 +160,7 @@ SplitAbundCommand::SplitAbundCommand(string option) { it = parameters.find("name"); //user has given a template file if(it != parameters.end()){ - path = hasPath(it->second); + path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["name"] = inputDir + it->second; } } @@ -81,16 +175,20 @@ SplitAbundCommand::SplitAbundCommand(string option) { listfile = validParameter.validFile(parameters, "list", true); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } - else{ inputFile = listfile; } + else{ inputFile = listfile; m->setListFile(listfile); } namefile = validParameter.validFile(parameters, "name", true); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } - else{ inputFile = namefile; } + else{ inputFile = namefile; m->setNameFile(namefile); } fastafile = validParameter.validFile(parameters, "fasta", true); if (fastafile == "not open") { abort = true; } - else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true; } + else if (fastafile == "not found") { + fastafile = m->getFastaFile(); + if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setFastaFile(fastafile); } groupfile = validParameter.validFile(parameters, "group", true); if (groupfile == "not open") { groupfile = ""; abort = true; } @@ -100,40 +198,47 @@ SplitAbundCommand::SplitAbundCommand(string option) { int error = groupMap->readMap(); if (error == 1) { abort = true; } - + m->setGroupFile(groupfile); } groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; } else if (groups == "all") { - if (groupfile != "") { Groups = groupMap->namesOfGroups; } + if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); } else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; } }else { - splitAtDash(groups, Groups); + m->splitAtDash(groups, Groups); } if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); } //do you have all files needed - if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true; } + if ((listfile == "") && (namefile == "")) { + namefile = m->getNameFile(); + if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); } + else { + listfile = m->getListFile(); + if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current list or namefile and the list or name parameter is required."); m->mothurOutEndLine(); abort = true; } + } + } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.validFile(parameters, "label", false); if (label == "not found") { label = ""; allLines = 1; } else { - if(label != "all") { splitAtDash(label, labels); allLines = 0; } + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } else { allLines = 1; } } string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; } - accnos = isTrue(temp); + accnos = m->isTrue(temp); temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; } - convert(temp, cutoff); + m->mothurConvert(temp, cutoff); if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; } - } } @@ -143,29 +248,6 @@ SplitAbundCommand::SplitAbundCommand(string option) { } } //********************************************************************************************************************** -void SplitAbundCommand::help(){ - try { - m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n"); - m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n"); - m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n"); - m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n"); - m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n"); - m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n"); - m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n"); - m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group. \n"); - m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n"); - m->mothurOut("If you want .abund and .rare files for all groups, set groups=all. \n"); - m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n"); - m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n"); - - } - catch(exception& e) { - m->errorOut(e, "SplitAbundCommand", "help"); - exit(1); - } -} -//********************************************************************************************************************** SplitAbundCommand::~SplitAbundCommand(){ if (groupfile != "") { delete groupMap; } } @@ -173,28 +255,10 @@ SplitAbundCommand::~SplitAbundCommand(){ int SplitAbundCommand::execute(){ try { - if (abort == true) { return 0; } + if (abort == true) { if (calledHelp) { return 0; } return 2; } if (listfile != "") { //you are using a listfile to determine abundance - if (outputDir == "") { outputDir = hasPath(listfile); } - - //remove old files so you can append later.... - string fileroot = outputDir + getRootName(getSimpleName(listfile)); - if (Groups.size() == 0) { - remove((fileroot + "rare.list").c_str()); - remove((fileroot + "abund.list").c_str()); - - wroteListFile["rare"] = false; - wroteListFile["abund"] = false; - }else{ - for (int i=0; ihasPath(listfile); } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; @@ -208,11 +272,11 @@ int SplitAbundCommand::execute(){ if (namefile != "") { readNamesFile(); } else { createNameMap(list); } - if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(list->getLabel()) == 1){ @@ -223,7 +287,7 @@ int SplitAbundCommand::execute(){ userLabels.erase(list->getLabel()); } - if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); delete list; @@ -246,7 +310,7 @@ int SplitAbundCommand::execute(){ list = input->getListVector(); //get new list vector to process } - if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //output error messages about any remaining user labels set::iterator it; @@ -262,7 +326,7 @@ int SplitAbundCommand::execute(){ } - if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //run last label if you need to if (needToRun == true) { @@ -277,23 +341,10 @@ int SplitAbundCommand::execute(){ delete input; - for (map::iterator itBool = wroteListFile.begin(); itBool != wroteListFile.end(); itBool++) { - string filename = fileroot + itBool->first; - if ((itBool->first == "rare") || (itBool->first == "abund")) { - filename = fileroot + itBool->first + ".list"; - } - if (itBool->second) { //we wrote to this file - outputNames.push_back(filename); - }else{ - remove(filename.c_str()); - } - } - - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } }else { //you are using the namefile to determine abundance - if (outputDir == "") { outputDir = hasPath(namefile); } + if (outputDir == "") { outputDir = m->hasPath(namefile); } splitNames(); writeNames(); @@ -303,7 +354,34 @@ int SplitAbundCommand::execute(){ if (accnos) { writeAccnos(tag); } if (fastafile != "") { parseFasta(tag); } } - + + //set fasta file as new current fastafile + string current = ""; + itTypes = outputTypes.find("fasta"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } + } + + itTypes = outputTypes.find("name"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } + } + + itTypes = outputTypes.find("group"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } + } + + itTypes = outputTypes.find("list"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } + } + + itTypes = outputTypes.find("accnos"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); } + } + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -329,7 +407,7 @@ int SplitAbundCommand::splitList(ListVector* thisList) { string bin = thisList->get(i); vector names; - splitAtComma(bin, names); //parses bin into individual sequence names + m->splitAtComma(bin, names); //parses bin into individual sequence names int size = names.size(); if (size <= cutoff) { @@ -339,9 +417,11 @@ int SplitAbundCommand::splitList(ListVector* thisList) { } }//end for - writeList(thisList); string tag = thisList->getLabel() + "."; + + writeList(thisList, tag); + if (groupfile != "") { parseGroup(tag); } if (accnos) { writeAccnos(tag); } if (fastafile != "") { parseFasta(tag); } @@ -355,7 +435,7 @@ int SplitAbundCommand::splitList(ListVector* thisList) { } } /**********************************************************************************************************************/ -int SplitAbundCommand::writeList(ListVector* thisList) { +int SplitAbundCommand::writeList(ListVector* thisList, string tag) { try { map filehandles; @@ -377,39 +457,39 @@ int SplitAbundCommand::writeList(ListVector* thisList) { ofstream aout; ofstream rout; - if (rareNames.size() != 0) { - string rare = outputDir + getRootName(getSimpleName(listfile)) + "rare.list"; - wroteListFile["rare"] = true; - openOutputFileAppend(rare, rout); - rout << thisList->getLabel() << '\t' << numRareBins << '\t'; - } + string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare." + getOutputFileNameTag("list"); + m->openOutputFile(rare, rout); + outputNames.push_back(rare); outputTypes["list"].push_back(rare); - if (abundNames.size() != 0) { - string abund = outputDir + getRootName(getSimpleName(listfile)) + "abund.list"; - wroteListFile["abund"] = true; - openOutputFileAppend(abund, aout); - aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; - } + string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund." + getOutputFileNameTag("list"); + m->openOutputFile(abund, aout); + outputNames.push_back(abund); outputTypes["list"].push_back(abund); + + if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; } + if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; } for (int i = 0; i < thisList->getNumBins(); i++) { if (m->control_pressed) { break; } string bin = list->get(i); - int size = getNumNames(bin); + int size = m->getNumNames(bin); if (size <= cutoff) { rout << bin << '\t'; } else { aout << bin << '\t'; } } - if (rareNames.size() != 0) { rout << endl; rout.close(); } - if (abundNames.size() != 0) { aout << endl; aout.close(); } - + if (rareNames.size() != 0) { rout << endl; } + if (abundNames.size() != 0) { aout << endl; } + + rout.close(); + aout.close(); + }else{ //parse names by abundance and group - string fileroot = outputDir + getRootName(getSimpleName(listfile)); + string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile)); ofstream* temp; ofstream* temp2; - map wroteFile; + //map wroteFile; map filehandles; map::iterator it3; @@ -419,8 +499,12 @@ int SplitAbundCommand::writeList(ListVector* thisList) { temp2 = new ofstream; filehandles[Groups[i]+".abund"] = temp2; - openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"])); - openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"])); + string rareGroupFileName = fileroot + Groups[i] + tag + ".rare." + getOutputFileNameTag("list"); + string abundGroupFileName = fileroot + Groups[i] + tag + ".abund." + getOutputFileNameTag("list"); + m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"])); + m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"])); + outputNames.push_back(rareGroupFileName); outputTypes["list"].push_back(rareGroupFileName); + outputNames.push_back(abundGroupFileName); outputTypes["list"].push_back(abundGroupFileName); } map groupVector; @@ -439,7 +523,7 @@ int SplitAbundCommand::writeList(ListVector* thisList) { string bin = list->get(i); vector names; - splitAtComma(bin, names); //parses bin into individual sequence names + m->splitAtComma(bin, names); //parses bin into individual sequence names //parse bin into list of sequences in each group for (int j = 0; j < names.size(); j++) { @@ -452,7 +536,7 @@ int SplitAbundCommand::writeList(ListVector* thisList) { string group = groupMap->getGroup(names[j]); - if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want itGroup = groupBins.find(group+rareAbund); if(itGroup == groupBins.end()) { groupBins[group+rareAbund] = names[j]; //add first name @@ -474,7 +558,6 @@ int SplitAbundCommand::writeList(ListVector* thisList) { //end list vector for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group - wroteListFile[it3->first] = true; (*(filehandles[it3->first])).close(); delete it3->second; } @@ -497,17 +580,17 @@ int SplitAbundCommand::splitNames() { //namefile //open input file ifstream in; - openInputFile(namefile, in); + m->openInputFile(namefile, in); while (!in.eof()) { if (m->control_pressed) { break; } string firstCol, secondCol; - in >> firstCol >> secondCol; gobble(in); + in >> firstCol >> secondCol; m->gobble(in); nameMap[firstCol] = secondCol; - int size = getNumNames(secondCol); + int size = m->getNumNames(secondCol); if (size <= cutoff) { rareNames.insert(firstCol); @@ -530,13 +613,13 @@ int SplitAbundCommand::readNamesFile() { try { //open input file ifstream in; - openInputFile(namefile, in); + m->openInputFile(namefile, in); while (!in.eof()) { if (m->control_pressed) { break; } string firstCol, secondCol; - in >> firstCol >> secondCol; gobble(in); + in >> firstCol >> secondCol; m->gobble(in); nameMap[firstCol] = secondCol; } @@ -561,7 +644,7 @@ int SplitAbundCommand::createNameMap(ListVector* thisList) { string bin = thisList->get(i); vector names; - splitAtComma(bin, names); //parses bin into individual sequence names + m->splitAtComma(bin, names); //parses bin into individual sequence names for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; } }//end for @@ -584,33 +667,32 @@ int SplitAbundCommand::writeNames() { //namefile ofstream aout; ofstream rout; + string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare." + getOutputFileNameTag("name"); + m->openOutputFile(rare, rout); + outputNames.push_back(rare); outputTypes["name"].push_back(rare); + + string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund." + getOutputFileNameTag("name"); + m->openOutputFile(abund, aout); + outputNames.push_back(abund); outputTypes["name"].push_back(abund); + if (rareNames.size() != 0) { - string rare = outputDir + getRootName(getSimpleName(namefile)) + "rare.names"; - openOutputFile(rare, rout); - outputNames.push_back(rare); - for (set::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) { rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl; } - rout.close(); } + rout.close(); if (abundNames.size() != 0) { - string abund = outputDir + getRootName(getSimpleName(namefile)) + "abund.names"; - openOutputFile(abund, aout); - outputNames.push_back(abund); - for (set::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) { aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl; } - aout.close(); } - + aout.close(); + }else{ //parse names by abundance and group - string fileroot = outputDir + getRootName(getSimpleName(namefile)); + string fileroot = outputDir + m->getRootName(m->getSimpleName(namefile)); ofstream* temp; ofstream* temp2; - map wroteFile; map filehandles; map::iterator it3; @@ -620,16 +702,15 @@ int SplitAbundCommand::writeNames() { //namefile temp2 = new ofstream; filehandles[Groups[i]+".abund"] = temp2; - openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"])); - openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"])); - - wroteFile[Groups[i] + ".rare"] = false; - wroteFile[Groups[i] + ".abund"] = false; + string rareGroupFileName = fileroot + Groups[i] + ".rare." + getOutputFileNameTag("name"); + string abundGroupFileName = fileroot + Groups[i] + ".abund." + getOutputFileNameTag("name"); + m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"])); + m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"])); } for (map::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) { vector names; - splitAtComma(itName->second, names); //parses bin into individual sequence names + m->splitAtComma(itName->second, names); //parses bin into individual sequence names string rareAbund; if (rareNames.count(itName->first) != 0) { //you are a rare name @@ -644,7 +725,7 @@ int SplitAbundCommand::writeNames() { //namefile string group = groupMap->getGroup(names[i]); - if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want itout = outputStrings.find(group+rareAbund); if (itout == outputStrings.end()) { outputStrings[group+rareAbund] = names[i] + '\t' + names[i]; @@ -654,17 +735,13 @@ int SplitAbundCommand::writeNames() { //namefile } } - for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { - *(filehandles[itout->first]) << itout->second << endl; - wroteFile[itout->first] = true; - } + for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl; } } for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(filehandles[it3->first])).close(); - if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + it3->first + ".names"); } - else { remove((it3->first).c_str()); } + outputNames.push_back(fileroot + it3->first + "." + getOutputFileNameTag("name")); outputTypes["name"].push_back(fileroot + it3->first + "." + getOutputFileNameTag("name")); delete it3->second; } } @@ -688,32 +765,29 @@ int SplitAbundCommand::writeAccnos(string tag) { ofstream aout; ofstream rout; - if (rareNames.size() != 0) { - string rare = outputDir + getRootName(getSimpleName(inputFile)) + tag + "rare.accnos"; - openOutputFile(rare, rout); - outputNames.push_back(rare); - - for (set::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) { - rout << (*itRare) << endl; - } - rout.close(); + + string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare." + getOutputFileNameTag("accnos"); + m->openOutputFile(rare, rout); + outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); + + for (set::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) { + rout << (*itRare) << endl; } + rout.close(); + + string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund." + getOutputFileNameTag("accnos"); + m->openOutputFile(abund, aout); + outputNames.push_back(abund); outputTypes["accnos"].push_back(abund); - if (abundNames.size() != 0) { - string abund = outputDir + getRootName(getSimpleName(inputFile)) + tag + "abund.accnos"; - openOutputFile(abund, aout); - outputNames.push_back(abund); - - for (set::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) { - aout << (*itAbund) << endl; - } - aout.close(); + for (set::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) { + aout << (*itAbund) << endl; } + aout.close(); + }else{ //parse names by abundance and group - string fileroot = outputDir + getRootName(getSimpleName(inputFile)); + string fileroot = outputDir + m->getRootName(m->getSimpleName(inputFile)); ofstream* temp; ofstream* temp2; - map wroteFile; map filehandles; map::iterator it3; @@ -723,20 +797,16 @@ int SplitAbundCommand::writeAccnos(string tag) { temp2 = new ofstream; filehandles[Groups[i]+".abund"] = temp2; - openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"])); - openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"])); - - wroteFile[Groups[i] + ".rare"] = false; - wroteFile[Groups[i] + ".abund"] = false; + m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("accnos"), *(filehandles[Groups[i]+".rare"])); + m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("accnos"), *(filehandles[Groups[i]+".abund"])); } //write rare for (set::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) { string group = groupMap->getGroup(*itRare); - if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want *(filehandles[group+".rare"]) << *itRare << endl; - wroteFile[group+".rare"] = true; } } @@ -744,17 +814,15 @@ int SplitAbundCommand::writeAccnos(string tag) { for (set::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) { string group = groupMap->getGroup(*itAbund); - if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want *(filehandles[group+".abund"]) << *itAbund << endl; - wroteFile[group+".abund"] = true; } } //close files for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(filehandles[it3->first])).close(); - if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".accnos"); } - else { remove((fileroot + tag + it3->first + ".accnos").c_str()); } + outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("accnos")); outputTypes["accnos"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("accnos")); delete it3->second; } } @@ -777,22 +845,18 @@ int SplitAbundCommand::parseGroup(string tag) { //namefile ofstream aout; ofstream rout; - if (rareNames.size() != 0) { - string rare = outputDir + getRootName(getSimpleName(groupfile)) + tag + "rare.groups"; - openOutputFile(rare, rout); - outputNames.push_back(rare); - } + string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare." + getOutputFileNameTag("group"); + m->openOutputFile(rare, rout); + outputNames.push_back(rare); outputTypes["group"].push_back(rare); + + string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund." + getOutputFileNameTag("group"); +; + m->openOutputFile(abund, aout); + outputNames.push_back(abund); outputTypes["group"].push_back(abund); - if (abundNames.size() != 0) { - string abund = outputDir + getRootName(getSimpleName(groupfile)) + tag + "abund.groups"; - openOutputFile(abund, aout); - outputNames.push_back(abund); - } - - for (map::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) { vector names; - splitAtComma(itName->second, names); //parses bin into individual sequence names + m->splitAtComma(itName->second, names); //parses bin into individual sequence names for (int i = 0; i < names.size(); i++) { @@ -810,14 +874,13 @@ int SplitAbundCommand::parseGroup(string tag) { //namefile } } - if (rareNames.size() != 0) { rout.close(); } - if (abundNames.size() != 0) { aout.close(); } + rout.close(); + aout.close(); }else{ //parse names by abundance and group - string fileroot = outputDir + getRootName(getSimpleName(groupfile)); + string fileroot = outputDir + m->getRootName(m->getSimpleName(groupfile)); ofstream* temp; ofstream* temp2; - map wroteFile; map filehandles; map::iterator it3; @@ -827,16 +890,13 @@ int SplitAbundCommand::parseGroup(string tag) { //namefile temp2 = new ofstream; filehandles[Groups[i]+".abund"] = temp2; - openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"])); - openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"])); - - wroteFile[Groups[i] + ".rare"] = false; - wroteFile[Groups[i] + ".abund"] = false; + m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("group"), *(filehandles[Groups[i]+".rare"])); + m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("group"), *(filehandles[Groups[i]+".abund"])); } for (map::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) { vector names; - splitAtComma(itName->second, names); //parses bin into individual sequence names + m->splitAtComma(itName->second, names); //parses bin into individual sequence names string rareAbund; if (rareNames.count(itName->first) != 0) { //you are a rare name @@ -849,17 +909,15 @@ int SplitAbundCommand::parseGroup(string tag) { //namefile string group = groupMap->getGroup(names[i]); - if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl; - wroteFile[group+rareAbund] = true; } } } for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(filehandles[it3->first])).close(); - if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".groups"); } - else { remove((fileroot + tag + it3->first + ".groups").c_str()); } + outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("group")); outputTypes["group"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("group")); delete it3->second; } } @@ -882,27 +940,22 @@ int SplitAbundCommand::parseFasta(string tag) { //namefile ofstream aout; ofstream rout; - if (rareNames.size() != 0) { - string rare = outputDir + getRootName(getSimpleName(fastafile)) + tag + "rare.fasta"; - openOutputFile(rare, rout); - outputNames.push_back(rare); - } - - if (abundNames.size() != 0) { - string abund = outputDir + getRootName(getSimpleName(fastafile)) + tag + "abund.fasta"; - openOutputFile(abund, aout); - outputNames.push_back(abund); - } - - + string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare." + getOutputFileNameTag("fasta"); + m->openOutputFile(rare, rout); + outputNames.push_back(rare); outputTypes["fasta"].push_back(rare); + + string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund." + getOutputFileNameTag("fasta"); + m->openOutputFile(abund, aout); + outputNames.push_back(abund); outputTypes["fasta"].push_back(abund); + //open input file ifstream in; - openInputFile(fastafile, in); + m->openInputFile(fastafile, in); while (!in.eof()) { if (m->control_pressed) { break; } - Sequence seq(in); gobble(in); + Sequence seq(in); m->gobble(in); if (seq.getName() != "") { @@ -922,14 +975,13 @@ int SplitAbundCommand::parseFasta(string tag) { //namefile } } in.close(); - if (rareNames.size() != 0) { rout.close(); } - if (abundNames.size() != 0) { aout.close(); } + rout.close(); + aout.close(); }else{ //parse names by abundance and group - string fileroot = outputDir + getRootName(getSimpleName(fastafile)); + string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile)); ofstream* temp; ofstream* temp2; - map wroteFile; map filehandles; map::iterator it3; @@ -939,21 +991,18 @@ int SplitAbundCommand::parseFasta(string tag) { //namefile temp2 = new ofstream; filehandles[Groups[i]+".abund"] = temp2; - openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"])); - openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"])); - - wroteFile[Groups[i] + ".rare"] = false; - wroteFile[Groups[i] + ".abund"] = false; + m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("fasta"), *(filehandles[Groups[i]+".rare"])); + m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("fasta"), *(filehandles[Groups[i]+".abund"])); } //open input file ifstream in; - openInputFile(fastafile, in); + m->openInputFile(fastafile, in); while (!in.eof()) { if (m->control_pressed) { break; } - Sequence seq(in); gobble(in); + Sequence seq(in); m->gobble(in); if (seq.getName() != "") { map::iterator itNames = nameMap.find(seq.getName()); @@ -962,7 +1011,7 @@ int SplitAbundCommand::parseFasta(string tag) { //namefile m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine(); }else{ vector names; - splitAtComma(itNames->second, names); //parses bin into individual sequence names + m->splitAtComma(itNames->second, names); //parses bin into individual sequence names string rareAbund; if (rareNames.count(itNames->first) != 0) { //you are a rare name @@ -975,9 +1024,8 @@ int SplitAbundCommand::parseFasta(string tag) { //namefile string group = groupMap->getGroup(seq.getName()); - if (inUsersGroups(group, Groups)) { //only add if this is in a group we want + if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want seq.printSequence(*(filehandles[group+rareAbund])); - wroteFile[group+rareAbund] = true; }else if(group == "not found") { m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine(); } @@ -989,8 +1037,7 @@ int SplitAbundCommand::parseFasta(string tag) { //namefile for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(filehandles[it3->first])).close(); - if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".fasta"); } - else { remove((fileroot + tag + it3->first + ".fasta").c_str()); } + outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("fasta")); outputTypes["fasta"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("fasta")); delete it3->second; } }