5 * Created by Sarah Westcott on 7/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "listseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
16 //**********************************************************************************************************************
17 vector<string> ListSeqsCommand::setParameters(){
19 CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfasta);
20 CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pname);
21 CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pcount);
22 CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pgroup);
23 CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(plist);
24 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(ptaxonomy);
25 CommandParameter palignreport("alignreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false); parameters.push_back(palignreport);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "ListSeqsCommand", "setParameters");
38 //**********************************************************************************************************************
39 string ListSeqsCommand::getHelpString(){
41 string helpString = "";
42 helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
43 helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy and alignreport. You must provide one of these parameters.\n";
44 helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
45 helpString += "Example list.seqs(fasta=amazon.fasta).\n";
46 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
50 m->errorOut(e, "ListSeqsCommand", "getHelpString");
54 //**********************************************************************************************************************
55 string ListSeqsCommand::getOutputPattern(string type) {
59 if (type == "accnos") { pattern = "[filename],accnos"; }
60 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
65 m->errorOut(e, "ListSeqsCommand", "getOutputPattern");
69 //**********************************************************************************************************************
70 ListSeqsCommand::ListSeqsCommand(){
72 abort = true; calledHelp = true;
74 vector<string> tempOutNames;
75 outputTypes["accnos"] = tempOutNames;
78 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
82 //**********************************************************************************************************************
84 ListSeqsCommand::ListSeqsCommand(string option) {
86 abort = false; calledHelp = false;
88 //allow user to run help
89 if(option == "help") { help(); abort = true; calledHelp = true; }
90 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92 vector<string> myArray = setParameters();
94 OptionParser parser(option);
95 map<string,string> parameters = parser.getParameters();
97 ValidParameters validParameter;
98 map<string,string>::iterator it;
100 //check to make sure all parameters are valid for command
101 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
102 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
105 //initialize outputTypes
106 vector<string> tempOutNames;
107 outputTypes["accnos"] = tempOutNames;
109 //if the user changes the output directory command factory will send this info to us in the output parameter
110 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
112 //if the user changes the input directory command factory will send this info to us in the output parameter
113 string inputDir = validParameter.validFile(parameters, "inputdir", false);
114 if (inputDir == "not found"){ inputDir = ""; }
117 it = parameters.find("alignreport");
118 //user has given a template file
119 if(it != parameters.end()){
120 path = m->hasPath(it->second);
121 //if the user has not given a path then, add inputdir. else leave path alone.
122 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
125 it = parameters.find("fasta");
126 //user has given a template file
127 if(it != parameters.end()){
128 path = m->hasPath(it->second);
129 //if the user has not given a path then, add inputdir. else leave path alone.
130 if (path == "") { parameters["fasta"] = inputDir + it->second; }
133 it = parameters.find("list");
134 //user has given a template file
135 if(it != parameters.end()){
136 path = m->hasPath(it->second);
137 //if the user has not given a path then, add inputdir. else leave path alone.
138 if (path == "") { parameters["list"] = inputDir + it->second; }
141 it = parameters.find("name");
142 //user has given a template file
143 if(it != parameters.end()){
144 path = m->hasPath(it->second);
145 //if the user has not given a path then, add inputdir. else leave path alone.
146 if (path == "") { parameters["name"] = inputDir + it->second; }
149 it = parameters.find("group");
150 //user has given a template file
151 if(it != parameters.end()){
152 path = m->hasPath(it->second);
153 //if the user has not given a path then, add inputdir. else leave path alone.
154 if (path == "") { parameters["group"] = inputDir + it->second; }
157 it = parameters.find("taxonomy");
158 //user has given a template file
159 if(it != parameters.end()){
160 path = m->hasPath(it->second);
161 //if the user has not given a path then, add inputdir. else leave path alone.
162 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
165 it = parameters.find("count");
166 //user has given a template file
167 if(it != parameters.end()){
168 path = m->hasPath(it->second);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { parameters["count"] = inputDir + it->second; }
174 //check for required parameters
175 fastafile = validParameter.validFile(parameters, "fasta", true);
176 if (fastafile == "not open") { abort = true; }
177 else if (fastafile == "not found") { fastafile = ""; }
178 else { m->setFastaFile(fastafile); }
180 namefile = validParameter.validFile(parameters, "name", true);
181 if (namefile == "not open") { abort = true; }
182 else if (namefile == "not found") { namefile = ""; }
183 else { m->setNameFile(namefile); }
185 groupfile = validParameter.validFile(parameters, "group", true);
186 if (groupfile == "not open") { abort = true; }
187 else if (groupfile == "not found") { groupfile = ""; }
188 else { m->setGroupFile(groupfile); }
190 alignfile = validParameter.validFile(parameters, "alignreport", true);
191 if (alignfile == "not open") { abort = true; }
192 else if (alignfile == "not found") { alignfile = ""; }
194 listfile = validParameter.validFile(parameters, "list", true);
195 if (listfile == "not open") { abort = true; }
196 else if (listfile == "not found") { listfile = ""; }
197 else { m->setListFile(listfile); }
199 taxfile = validParameter.validFile(parameters, "taxonomy", true);
200 if (taxfile == "not open") { abort = true; }
201 else if (taxfile == "not found") { taxfile = ""; }
202 else { m->setTaxonomyFile(taxfile); }
204 countfile = validParameter.validFile(parameters, "count", true);
205 if (countfile == "not open") { abort = true; }
206 else if (countfile == "not found") { countfile = ""; }
207 else { m->setCountTableFile(countfile); }
209 if ((countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
212 if (outputDir != "") { okay++; }
213 if (inputDir != "") { okay++; }
215 if (parameters.size() > okay) { m->mothurOut("You may only enter one file."); m->mothurOutEndLine(); abort = true; }
219 catch(exception& e) {
220 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
224 //**********************************************************************************************************************
226 int ListSeqsCommand::execute(){
229 if (abort == true) { if (calledHelp) { return 0; } return 2; }
231 //read functions fill names vector
232 if (fastafile != "") { inputFileName = fastafile; readFasta(); }
233 else if (namefile != "") { inputFileName = namefile; readName(); }
234 else if (groupfile != "") { inputFileName = groupfile; readGroup(); }
235 else if (alignfile != "") { inputFileName = alignfile; readAlign(); }
236 else if (listfile != "") { inputFileName = listfile; readList(); }
237 else if (taxfile != "") { inputFileName = taxfile; readTax(); }
238 else if (countfile != "") { inputFileName = countfile; readCount(); }
240 if (m->control_pressed) { outputTypes.clear(); return 0; }
242 //sort in alphabetical order
243 sort(names.begin(), names.end());
245 if (outputDir == "") { outputDir += m->hasPath(inputFileName); }
247 map<string, string> variables;
248 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFileName));
249 string outputFileName = getOutputFileName("accnos", variables);
252 m->openOutputFile(outputFileName, out);
253 outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
255 //output to .accnos file
256 for (int i = 0; i < names.size(); i++) {
258 if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; }
260 out << names[i] << endl;
264 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); return 0; }
266 m->setAccnosFile(outputFileName);
268 m->mothurOutEndLine();
269 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
270 m->mothurOut(outputFileName); m->mothurOutEndLine();
271 m->mothurOutEndLine();
273 //set accnos file as new current accnosfile
275 itTypes = outputTypes.find("accnos");
276 if (itTypes != outputTypes.end()) {
277 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
283 catch(exception& e) {
284 m->errorOut(e, "ListSeqsCommand", "execute");
289 //**********************************************************************************************************************
290 int ListSeqsCommand::readFasta(){
294 m->openInputFile(fastafile, in);
298 //string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta";
299 //m->openOutputFile(newFastaName, out);
301 //string lastName = "";
305 if (m->control_pressed) { in.close(); return 0; }
307 Sequence currSeq(in);
308 name = currSeq.getName();
310 if (name != "") { names.push_back(name); }
313 if (m->debug) { count++; cout << "[DEBUG]: count = " + toString(count) + ", name = " + currSeq.getName() + "\n"; }
321 catch(exception& e) {
322 m->errorOut(e, "ListSeqsCommand", "readFasta");
326 //**********************************************************************************************************************
327 int ListSeqsCommand::readList(){
330 m->openInputFile(listfile, in);
333 //read in list vector
337 for (int i = 0; i < list.getNumBins(); i++) {
338 string binnames = list.get(i);
340 if (m->control_pressed) { in.close(); return 0; }
342 m->splitAtComma(binnames, names);
350 catch(exception& e) {
351 m->errorOut(e, "ListSeqsCommand", "readList");
356 //**********************************************************************************************************************
357 int ListSeqsCommand::readName(){
361 m->openInputFile(namefile, in);
362 string name, firstCol, secondCol;
366 if (m->control_pressed) { in.close(); return 0; }
371 //parse second column saving each name
372 m->splitAtComma(secondCol, names);
380 catch(exception& e) {
381 m->errorOut(e, "ListSeqsCommand", "readName");
386 //**********************************************************************************************************************
387 int ListSeqsCommand::readGroup(){
391 m->openInputFile(groupfile, in);
396 if (m->control_pressed) { in.close(); return 0; }
398 in >> name; m->gobble(in); //read from first column
399 in >> group; //read from second column
401 names.push_back(name);
409 catch(exception& e) {
410 m->errorOut(e, "ListSeqsCommand", "readGroup");
414 //**********************************************************************************************************************
415 int ListSeqsCommand::readCount(){
418 ct.readTable(countfile, false);
420 if (m->control_pressed) { return 0; }
422 names = ct.getNamesOfSeqs();
427 catch(exception& e) {
428 m->errorOut(e, "ListSeqsCommand", "readCount");
432 //**********************************************************************************************************************
433 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
434 int ListSeqsCommand::readAlign(){
438 m->openInputFile(alignfile, in);
441 //read column headers
442 for (int i = 0; i < 16; i++) {
443 if (!in.eof()) { in >> junk; }
450 if (m->control_pressed) { in.close(); return 0; }
452 in >> name; //read from first column
455 for (int i = 0; i < 15; i++) {
456 if (!in.eof()) { in >> junk; }
460 names.push_back(name);
470 catch(exception& e) {
471 m->errorOut(e, "ListSeqsCommand", "readAlign");
475 //**********************************************************************************************************************
476 int ListSeqsCommand::readTax(){
480 m->openInputFile(taxfile, in);
481 string name, firstCol, secondCol;
485 if (m->control_pressed) { in.close(); return 0; }
490 names.push_back(firstCol);
500 catch(exception& e) {
501 m->errorOut(e, "ListSeqsCommand", "readTax");
505 //**********************************************************************************************************************