X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=getseqscommand.cpp;h=d63dc79974ac007f504f7f1928ed8d0f031dcfe3;hb=9ada98592a54c82d08f3d46c9b1d8c3e472a922d;hp=509241d87fb2207c30ce1c7db0133a2a59f357da;hpb=fe346922fe0af5b1a025beacb211078d37598fd4;p=mothur.git diff --git a/getseqscommand.cpp b/getseqscommand.cpp index 509241d..d63dc79 100644 --- a/getseqscommand.cpp +++ b/getseqscommand.cpp @@ -9,6 +9,7 @@ #include "getseqscommand.h" #include "sequence.hpp" +#include "listvector.hpp" //********************************************************************************************************************** @@ -21,19 +22,78 @@ GetSeqsCommand::GetSeqsCommand(string option){ else { //valid paramters for this command - string Array[] = {"fasta","name", "group", "alignreport", "accnos" }; + string Array[] = {"fasta","name", "group", "alignreport", "accnos", "list","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); map parameters = parser.getParameters(); ValidParameters validParameter; + map::iterator it; //check to make sure all parameters are valid for command - for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("alignreport"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["alignreport"] = inputDir + it->second; } + } + + it = parameters.find("fasta"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fasta"] = inputDir + it->second; } + } + + it = parameters.find("accnos"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["accnos"] = inputDir + it->second; } + } + + it = parameters.find("list"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["list"] = inputDir + it->second; } + } + + it = parameters.find("name"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["name"] = inputDir + it->second; } + } + + it = parameters.find("group"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["group"] = inputDir + it->second; } + } + } + + //check for required parameters accnosfile = validParameter.validFile(parameters, "accnos", true); if (accnosfile == "not open") { abort = true; } @@ -55,9 +115,16 @@ GetSeqsCommand::GetSeqsCommand(string option){ if (alignfile == "not open") { abort = true; } else if (alignfile == "not found") { alignfile = ""; } - if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "")) { mothurOut("You must provide one of the following: fasta, name, group, alignreport."); mothurOutEndLine(); abort = true; } + listfile = validParameter.validFile(parameters, "list", true); + if (listfile == "not open") { abort = true; } + else if (listfile == "not found") { listfile = ""; } + + if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "")) { mothurOut("You must provide one of the following: fasta, name, group, alignreport or listfile."); mothurOutEndLine(); abort = true; } - if (parameters.size() > 2) { mothurOut("You may only enter one of the following: fasta, name, group, alignreport."); mothurOutEndLine(); abort = true; } + int okay = 2; + if (outputDir != "") { okay++; } + + if (parameters.size() > okay) { mothurOut("You may only enter one of the following: fasta, name, group, alignreport or listfile."); mothurOutEndLine(); abort = true; } } } @@ -70,9 +137,9 @@ GetSeqsCommand::GetSeqsCommand(string option){ void GetSeqsCommand::help(){ try { - mothurOut("The get.seqs command reads an .accnos file and one of the following file types: fasta, name, group or alignreport file.\n"); + mothurOut("The get.seqs command reads an .accnos file and one of the following file types: fasta, name, group, list or alignreport file.\n"); mothurOut("It outputs a file containing only the sequences in the .accnos file.\n"); - mothurOut("The get.seqs command parameters are accnos, fasta, name, group and alignreport. You must provide accnos and one of the other parameters.\n"); + mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list and alignreport. You must provide accnos and one of the other parameters.\n"); mothurOut("The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n"); mothurOut("Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n"); mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); @@ -98,6 +165,7 @@ int GetSeqsCommand::execute(){ else if (namefile != "") { readName(); } else if (groupfile != "") { readGroup(); } else if (alignfile != "") { readAlign(); } + else if (listfile != "") { readList(); } return 0; } @@ -111,7 +179,8 @@ int GetSeqsCommand::execute(){ //********************************************************************************************************************** void GetSeqsCommand::readFasta(){ try { - string outputFileName = getRootName(fastafile) + "pick" + getExtension(fastafile); + if (outputDir == "") { outputDir += hasPath(fastafile); } + string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + "pick" + getExtension(fastafile); ofstream out; openOutputFile(outputFileName, out); @@ -125,13 +194,79 @@ void GetSeqsCommand::readFasta(){ Sequence currSeq(in); name = currSeq.getName(); - //if this name is in the accnos file - if (names.count(name) == 1) { - wroteSomething = true; + if (name != "") { + //if this name is in the accnos file + if (names.count(name) == 1) { + wroteSomething = true; + + currSeq.printSequence(out); + + names.erase(name); + } + } + gobble(in); + } + in.close(); + out.close(); + + if (wroteSomething == false) { + mothurOut("Your file does not contain any sequence from the .accnos file."); mothurOutEndLine(); + remove(outputFileName.c_str()); + } + + } + catch(exception& e) { + errorOut(e, "GetSeqsCommand", "readFasta"); + exit(1); + } +} +//********************************************************************************************************************** +void GetSeqsCommand::readList(){ + try { + if (outputDir == "") { outputDir += hasPath(listfile); } + string outputFileName = outputDir + getRootName(getSimpleName(listfile)) + "pick" + getExtension(listfile); + ofstream out; + openOutputFile(outputFileName, out); + + ifstream in; + openInputFile(listfile, in); + + bool wroteSomething = false; + + while(!in.eof()){ + //read in list vector + ListVector list(in); + + //make a new list vector + ListVector newList; + newList.setLabel(list.getLabel()); + + //for each bin + for (int i = 0; i < list.getNumBins(); i++) { + + //parse out names that are in accnos file + string binnames = list.get(i); - currSeq.printSequence(out); + string newNames = ""; + while (binnames.find_first_of(',') != -1) { + string name = binnames.substr(0,binnames.find_first_of(',')); + binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length()); + + //if that name is in the .accnos file, add it + if (names.count(name) == 1) { newNames += name + ","; } + } + + //get last name + if (names.count(binnames) == 1) { newNames += binnames; } + + //if there are names in this bin add to new list + if (newNames != "") { newList.push_back(newNames); } + } - names.erase(name); + //print new listvector + if (newList.getNumBins() != 0) { + wroteSomething = true; + newList.print(out); } gobble(in); @@ -146,16 +281,15 @@ void GetSeqsCommand::readFasta(){ } catch(exception& e) { - errorOut(e, "GetSeqsCommand", "readFasta"); + errorOut(e, "GetSeqsCommand", "readList"); exit(1); } } - //********************************************************************************************************************** void GetSeqsCommand::readName(){ try { - - string outputFileName = getRootName(namefile) + "pick" + getExtension(namefile);; + if (outputDir == "") { outputDir += hasPath(namefile); } + string outputFileName = outputDir + getRootName(getSimpleName(namefile)) + "pick" + getExtension(namefile); ofstream out; openOutputFile(outputFileName, out); @@ -186,7 +320,6 @@ void GetSeqsCommand::readName(){ for (int i = 0; i < parsedNames.size(); i++) { if (names.count(parsedNames[i]) == 1) { validSecond.push_back(parsedNames[i]); - names.erase(parsedNames[i]); } } @@ -202,7 +335,6 @@ void GetSeqsCommand::readName(){ for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; - names.erase(firstCol); //make first name in set you come to first column and then add the remaining names to second column }else { @@ -239,8 +371,8 @@ void GetSeqsCommand::readName(){ //********************************************************************************************************************** void GetSeqsCommand::readGroup(){ try { - - string outputFileName = getRootName(groupfile) + "pick" + getExtension(groupfile); + if (outputDir == "") { outputDir += hasPath(groupfile); } + string outputFileName = outputDir + getRootName(getSimpleName(groupfile)) + "pick" + getExtension(groupfile); ofstream out; openOutputFile(outputFileName, out); @@ -285,7 +417,8 @@ void GetSeqsCommand::readGroup(){ //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name void GetSeqsCommand::readAlign(){ try { - string outputFileName = getRootName(getRootName(alignfile)) + "pick.align.report"; + if (outputDir == "") { outputDir += hasPath(alignfile); } + string outputFileName = outputDir + getRootName(getSimpleName(alignfile)) + "pick.align.report"; ofstream out; openOutputFile(outputFileName, out);