5 * Created by Sarah Westcott on 7/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "listseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
15 //**********************************************************************************************************************
16 vector<string> ListSeqsCommand::setParameters(){
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pfasta);
19 CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pname);
20 CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pgroup);
21 CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(plist);
22 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
23 CommandParameter palignreport("alignreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(palignreport);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "ListSeqsCommand", "setParameters");
36 //**********************************************************************************************************************
37 string ListSeqsCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The list.seqs command reads a fasta, name, group, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
41 helpString += "The list.seqs command parameters are fasta, name, group, list, taxonomy and alignreport. You must provide one of these parameters.\n";
42 helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
43 helpString += "Example list.seqs(fasta=amazon.fasta).\n";
44 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
48 m->errorOut(e, "ListSeqsCommand", "getHelpString");
52 //**********************************************************************************************************************
53 string ListSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
55 string outputFileName = "";
56 map<string, vector<string> >::iterator it;
58 //is this a type this command creates
59 it = outputTypes.find(type);
60 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
62 if (type == "accnos") { outputFileName = "accnos"; }
63 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
65 return outputFileName;
68 m->errorOut(e, "ListSeqsCommand", "getOutputFileNameTag");
72 //**********************************************************************************************************************
73 ListSeqsCommand::ListSeqsCommand(){
75 abort = true; calledHelp = true;
77 vector<string> tempOutNames;
78 outputTypes["accnos"] = tempOutNames;
81 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
85 //**********************************************************************************************************************
87 ListSeqsCommand::ListSeqsCommand(string option) {
89 abort = false; calledHelp = false;
91 //allow user to run help
92 if(option == "help") { help(); abort = true; calledHelp = true; }
93 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
95 vector<string> myArray = setParameters();
97 OptionParser parser(option);
98 map<string,string> parameters = parser.getParameters();
100 ValidParameters validParameter;
101 map<string,string>::iterator it;
103 //check to make sure all parameters are valid for command
104 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
105 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
108 //initialize outputTypes
109 vector<string> tempOutNames;
110 outputTypes["accnos"] = tempOutNames;
112 //if the user changes the output directory command factory will send this info to us in the output parameter
113 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
120 it = parameters.find("alignreport");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
128 it = parameters.find("fasta");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["fasta"] = inputDir + it->second; }
136 it = parameters.find("list");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["list"] = inputDir + it->second; }
144 it = parameters.find("name");
145 //user has given a template file
146 if(it != parameters.end()){
147 path = m->hasPath(it->second);
148 //if the user has not given a path then, add inputdir. else leave path alone.
149 if (path == "") { parameters["name"] = inputDir + it->second; }
152 it = parameters.find("group");
153 //user has given a template file
154 if(it != parameters.end()){
155 path = m->hasPath(it->second);
156 //if the user has not given a path then, add inputdir. else leave path alone.
157 if (path == "") { parameters["group"] = inputDir + it->second; }
160 it = parameters.find("taxonomy");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
169 //check for required parameters
170 fastafile = validParameter.validFile(parameters, "fasta", true);
171 if (fastafile == "not open") { abort = true; }
172 else if (fastafile == "not found") { fastafile = ""; }
173 else { m->setFastaFile(fastafile); }
175 namefile = validParameter.validFile(parameters, "name", true);
176 if (namefile == "not open") { abort = true; }
177 else if (namefile == "not found") { namefile = ""; }
178 else { m->setNameFile(namefile); }
180 groupfile = validParameter.validFile(parameters, "group", true);
181 if (groupfile == "not open") { abort = true; }
182 else if (groupfile == "not found") { groupfile = ""; }
183 else { m->setGroupFile(groupfile); }
185 alignfile = validParameter.validFile(parameters, "alignreport", true);
186 if (alignfile == "not open") { abort = true; }
187 else if (alignfile == "not found") { alignfile = ""; }
189 listfile = validParameter.validFile(parameters, "list", true);
190 if (listfile == "not open") { abort = true; }
191 else if (listfile == "not found") { listfile = ""; }
192 else { m->setListFile(listfile); }
194 taxfile = validParameter.validFile(parameters, "taxonomy", true);
195 if (taxfile == "not open") { abort = true; }
196 else if (taxfile == "not found") { taxfile = ""; }
197 else { m->setTaxonomyFile(taxfile); }
199 if ((fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
202 if (outputDir != "") { okay++; }
203 if (inputDir != "") { okay++; }
205 if (parameters.size() > okay) { m->mothurOut("You may only enter one file."); m->mothurOutEndLine(); abort = true; }
209 catch(exception& e) {
210 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
214 //**********************************************************************************************************************
216 int ListSeqsCommand::execute(){
219 if (abort == true) { if (calledHelp) { return 0; } return 2; }
221 //read functions fill names vector
222 if (fastafile != "") { inputFileName = fastafile; readFasta(); }
223 else if (namefile != "") { inputFileName = namefile; readName(); }
224 else if (groupfile != "") { inputFileName = groupfile; readGroup(); }
225 else if (alignfile != "") { inputFileName = alignfile; readAlign(); }
226 else if (listfile != "") { inputFileName = listfile; readList(); }
227 else if (taxfile != "") { inputFileName = taxfile; readTax(); }
229 if (m->control_pressed) { outputTypes.clear(); return 0; }
231 //sort in alphabetical order
232 sort(names.begin(), names.end());
234 if (outputDir == "") { outputDir += m->hasPath(inputFileName); }
236 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + getOutputFileNameTag("accnos");
239 m->openOutputFile(outputFileName, out);
240 outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
242 //output to .accnos file
243 for (int i = 0; i < names.size(); i++) {
245 if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; }
247 out << names[i] << endl;
251 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); return 0; }
253 m->setAccnosFile(outputFileName);
255 m->mothurOutEndLine();
256 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
257 m->mothurOut(outputFileName); m->mothurOutEndLine();
258 m->mothurOutEndLine();
260 //set accnos file as new current accnosfile
262 itTypes = outputTypes.find("accnos");
263 if (itTypes != outputTypes.end()) {
264 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
270 catch(exception& e) {
271 m->errorOut(e, "ListSeqsCommand", "execute");
276 //**********************************************************************************************************************
277 int ListSeqsCommand::readFasta(){
281 m->openInputFile(fastafile, in);
285 //string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta";
286 //m->openOutputFile(newFastaName, out);
288 //string lastName = "";
292 if (m->control_pressed) { in.close(); return 0; }
294 Sequence currSeq(in);
295 name = currSeq.getName();
296 //if (lastName == "") { lastName = name; }
297 //if (name != lastName) { count = 1; }
300 //Sequence newSeq(name+"_"+toString(count), currSeq.getAligned());
301 //newSeq.printSequence(out);
303 if (name != "") { names.push_back(name); }
314 catch(exception& e) {
315 m->errorOut(e, "ListSeqsCommand", "readFasta");
319 //**********************************************************************************************************************
320 int ListSeqsCommand::readList(){
323 m->openInputFile(listfile, in);
326 //read in list vector
330 for (int i = 0; i < list.getNumBins(); i++) {
331 string binnames = list.get(i);
333 if (m->control_pressed) { in.close(); return 0; }
335 m->splitAtComma(binnames, names);
343 catch(exception& e) {
344 m->errorOut(e, "ListSeqsCommand", "readList");
349 //**********************************************************************************************************************
350 int ListSeqsCommand::readName(){
354 m->openInputFile(namefile, in);
355 string name, firstCol, secondCol;
359 if (m->control_pressed) { in.close(); return 0; }
364 //parse second column saving each name
365 m->splitAtComma(secondCol, names);
373 catch(exception& e) {
374 m->errorOut(e, "ListSeqsCommand", "readName");
379 //**********************************************************************************************************************
380 int ListSeqsCommand::readGroup(){
384 m->openInputFile(groupfile, in);
389 if (m->control_pressed) { in.close(); return 0; }
391 in >> name; m->gobble(in); //read from first column
392 in >> group; //read from second column
394 names.push_back(name);
402 catch(exception& e) {
403 m->errorOut(e, "ListSeqsCommand", "readGroup");
408 //**********************************************************************************************************************
409 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
410 int ListSeqsCommand::readAlign(){
414 m->openInputFile(alignfile, in);
417 //read column headers
418 for (int i = 0; i < 16; i++) {
419 if (!in.eof()) { in >> junk; }
426 if (m->control_pressed) { in.close(); return 0; }
428 in >> name; //read from first column
431 for (int i = 0; i < 15; i++) {
432 if (!in.eof()) { in >> junk; }
436 names.push_back(name);
446 catch(exception& e) {
447 m->errorOut(e, "ListSeqsCommand", "readAlign");
451 //**********************************************************************************************************************
452 int ListSeqsCommand::readTax(){
456 m->openInputFile(taxfile, in);
457 string name, firstCol, secondCol;
461 if (m->control_pressed) { in.close(); return 0; }
466 names.push_back(firstCol);
476 catch(exception& e) {
477 m->errorOut(e, "ListSeqsCommand", "readTax");
481 //**********************************************************************************************************************