5 * Created by Sarah Westcott on 7/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "listseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
16 //**********************************************************************************************************************
17 vector<string> ListSeqsCommand::setParameters(){
19 CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pfasta);
20 CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pname);
21 CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pcount);
22 CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pgroup);
23 CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(plist);
24 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
25 CommandParameter palignreport("alignreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(palignreport);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "ListSeqsCommand", "setParameters");
38 //**********************************************************************************************************************
39 string ListSeqsCommand::getHelpString(){
41 string helpString = "";
42 helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
43 helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy and alignreport. You must provide one of these parameters.\n";
44 helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
45 helpString += "Example list.seqs(fasta=amazon.fasta).\n";
46 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
50 m->errorOut(e, "ListSeqsCommand", "getHelpString");
54 //**********************************************************************************************************************
55 string ListSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
57 string outputFileName = "";
58 map<string, vector<string> >::iterator it;
60 //is this a type this command creates
61 it = outputTypes.find(type);
62 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
64 if (type == "accnos") { outputFileName = "accnos"; }
65 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
67 return outputFileName;
70 m->errorOut(e, "ListSeqsCommand", "getOutputFileNameTag");
74 //**********************************************************************************************************************
75 ListSeqsCommand::ListSeqsCommand(){
77 abort = true; calledHelp = true;
79 vector<string> tempOutNames;
80 outputTypes["accnos"] = tempOutNames;
83 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
87 //**********************************************************************************************************************
89 ListSeqsCommand::ListSeqsCommand(string option) {
91 abort = false; calledHelp = false;
93 //allow user to run help
94 if(option == "help") { help(); abort = true; calledHelp = true; }
95 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
97 vector<string> myArray = setParameters();
99 OptionParser parser(option);
100 map<string,string> parameters = parser.getParameters();
102 ValidParameters validParameter;
103 map<string,string>::iterator it;
105 //check to make sure all parameters are valid for command
106 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
107 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
110 //initialize outputTypes
111 vector<string> tempOutNames;
112 outputTypes["accnos"] = tempOutNames;
114 //if the user changes the output directory command factory will send this info to us in the output parameter
115 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
117 //if the user changes the input directory command factory will send this info to us in the output parameter
118 string inputDir = validParameter.validFile(parameters, "inputdir", false);
119 if (inputDir == "not found"){ inputDir = ""; }
122 it = parameters.find("alignreport");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
130 it = parameters.find("fasta");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["fasta"] = inputDir + it->second; }
138 it = parameters.find("list");
139 //user has given a template file
140 if(it != parameters.end()){
141 path = m->hasPath(it->second);
142 //if the user has not given a path then, add inputdir. else leave path alone.
143 if (path == "") { parameters["list"] = inputDir + it->second; }
146 it = parameters.find("name");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["name"] = inputDir + it->second; }
154 it = parameters.find("group");
155 //user has given a template file
156 if(it != parameters.end()){
157 path = m->hasPath(it->second);
158 //if the user has not given a path then, add inputdir. else leave path alone.
159 if (path == "") { parameters["group"] = inputDir + it->second; }
162 it = parameters.find("taxonomy");
163 //user has given a template file
164 if(it != parameters.end()){
165 path = m->hasPath(it->second);
166 //if the user has not given a path then, add inputdir. else leave path alone.
167 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
170 it = parameters.find("count");
171 //user has given a template file
172 if(it != parameters.end()){
173 path = m->hasPath(it->second);
174 //if the user has not given a path then, add inputdir. else leave path alone.
175 if (path == "") { parameters["count"] = inputDir + it->second; }
179 //check for required parameters
180 fastafile = validParameter.validFile(parameters, "fasta", true);
181 if (fastafile == "not open") { abort = true; }
182 else if (fastafile == "not found") { fastafile = ""; }
183 else { m->setFastaFile(fastafile); }
185 namefile = validParameter.validFile(parameters, "name", true);
186 if (namefile == "not open") { abort = true; }
187 else if (namefile == "not found") { namefile = ""; }
188 else { m->setNameFile(namefile); }
190 groupfile = validParameter.validFile(parameters, "group", true);
191 if (groupfile == "not open") { abort = true; }
192 else if (groupfile == "not found") { groupfile = ""; }
193 else { m->setGroupFile(groupfile); }
195 alignfile = validParameter.validFile(parameters, "alignreport", true);
196 if (alignfile == "not open") { abort = true; }
197 else if (alignfile == "not found") { alignfile = ""; }
199 listfile = validParameter.validFile(parameters, "list", true);
200 if (listfile == "not open") { abort = true; }
201 else if (listfile == "not found") { listfile = ""; }
202 else { m->setListFile(listfile); }
204 taxfile = validParameter.validFile(parameters, "taxonomy", true);
205 if (taxfile == "not open") { abort = true; }
206 else if (taxfile == "not found") { taxfile = ""; }
207 else { m->setTaxonomyFile(taxfile); }
209 countfile = validParameter.validFile(parameters, "count", true);
210 if (countfile == "not open") { abort = true; }
211 else if (countfile == "not found") { countfile = ""; }
212 else { m->setCountTableFile(countfile); }
214 if ((countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
217 if (outputDir != "") { okay++; }
218 if (inputDir != "") { okay++; }
220 if (parameters.size() > okay) { m->mothurOut("You may only enter one file."); m->mothurOutEndLine(); abort = true; }
224 catch(exception& e) {
225 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
229 //**********************************************************************************************************************
231 int ListSeqsCommand::execute(){
234 if (abort == true) { if (calledHelp) { return 0; } return 2; }
236 //read functions fill names vector
237 if (fastafile != "") { inputFileName = fastafile; readFasta(); }
238 else if (namefile != "") { inputFileName = namefile; readName(); }
239 else if (groupfile != "") { inputFileName = groupfile; readGroup(); }
240 else if (alignfile != "") { inputFileName = alignfile; readAlign(); }
241 else if (listfile != "") { inputFileName = listfile; readList(); }
242 else if (taxfile != "") { inputFileName = taxfile; readTax(); }
243 else if (countfile != "") { inputFileName = countfile; readCount(); }
245 if (m->control_pressed) { outputTypes.clear(); return 0; }
247 //sort in alphabetical order
248 sort(names.begin(), names.end());
250 if (outputDir == "") { outputDir += m->hasPath(inputFileName); }
252 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + getOutputFileNameTag("accnos");
255 m->openOutputFile(outputFileName, out);
256 outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
258 //output to .accnos file
259 for (int i = 0; i < names.size(); i++) {
261 if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; }
263 out << names[i] << endl;
267 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); return 0; }
269 m->setAccnosFile(outputFileName);
271 m->mothurOutEndLine();
272 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
273 m->mothurOut(outputFileName); m->mothurOutEndLine();
274 m->mothurOutEndLine();
276 //set accnos file as new current accnosfile
278 itTypes = outputTypes.find("accnos");
279 if (itTypes != outputTypes.end()) {
280 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
286 catch(exception& e) {
287 m->errorOut(e, "ListSeqsCommand", "execute");
292 //**********************************************************************************************************************
293 int ListSeqsCommand::readFasta(){
297 m->openInputFile(fastafile, in);
301 //string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta";
302 //m->openOutputFile(newFastaName, out);
304 //string lastName = "";
308 if (m->control_pressed) { in.close(); return 0; }
310 Sequence currSeq(in);
311 name = currSeq.getName();
313 if (name != "") { names.push_back(name); }
324 catch(exception& e) {
325 m->errorOut(e, "ListSeqsCommand", "readFasta");
329 //**********************************************************************************************************************
330 int ListSeqsCommand::readList(){
333 m->openInputFile(listfile, in);
336 //read in list vector
340 for (int i = 0; i < list.getNumBins(); i++) {
341 string binnames = list.get(i);
343 if (m->control_pressed) { in.close(); return 0; }
345 m->splitAtComma(binnames, names);
353 catch(exception& e) {
354 m->errorOut(e, "ListSeqsCommand", "readList");
359 //**********************************************************************************************************************
360 int ListSeqsCommand::readName(){
364 m->openInputFile(namefile, in);
365 string name, firstCol, secondCol;
369 if (m->control_pressed) { in.close(); return 0; }
374 //parse second column saving each name
375 m->splitAtComma(secondCol, names);
383 catch(exception& e) {
384 m->errorOut(e, "ListSeqsCommand", "readName");
389 //**********************************************************************************************************************
390 int ListSeqsCommand::readGroup(){
394 m->openInputFile(groupfile, in);
399 if (m->control_pressed) { in.close(); return 0; }
401 in >> name; m->gobble(in); //read from first column
402 in >> group; //read from second column
404 names.push_back(name);
412 catch(exception& e) {
413 m->errorOut(e, "ListSeqsCommand", "readGroup");
417 //**********************************************************************************************************************
418 int ListSeqsCommand::readCount(){
421 ct.readTable(countfile);
423 if (m->control_pressed) { return 0; }
425 names = ct.getNamesOfSeqs();
430 catch(exception& e) {
431 m->errorOut(e, "ListSeqsCommand", "readCount");
435 //**********************************************************************************************************************
436 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
437 int ListSeqsCommand::readAlign(){
441 m->openInputFile(alignfile, in);
444 //read column headers
445 for (int i = 0; i < 16; i++) {
446 if (!in.eof()) { in >> junk; }
453 if (m->control_pressed) { in.close(); return 0; }
455 in >> name; //read from first column
458 for (int i = 0; i < 15; i++) {
459 if (!in.eof()) { in >> junk; }
463 names.push_back(name);
473 catch(exception& e) {
474 m->errorOut(e, "ListSeqsCommand", "readAlign");
478 //**********************************************************************************************************************
479 int ListSeqsCommand::readTax(){
483 m->openInputFile(taxfile, in);
484 string name, firstCol, secondCol;
488 if (m->control_pressed) { in.close(); return 0; }
493 names.push_back(firstCol);
503 catch(exception& e) {
504 m->errorOut(e, "ListSeqsCommand", "readTax");
508 //**********************************************************************************************************************