5 * Created by Sarah Westcott on 7/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "listseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
16 //**********************************************************************************************************************
17 vector<string> ListSeqsCommand::setParameters(){
19 CommandParameter pfastq("fastq", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfastq);
20 CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfasta);
21 CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pcount);
23 CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pgroup);
24 CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(plist);
25 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(ptaxonomy);
26 CommandParameter palignreport("alignreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false); parameters.push_back(palignreport);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "ListSeqsCommand", "setParameters");
39 //**********************************************************************************************************************
40 string ListSeqsCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy, fastq or alignreport file and outputs a .accnos file containing sequence names.\n";
44 helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy, fastq and alignreport. You must provide one of these parameters.\n";
45 helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
46 helpString += "Example list.seqs(fasta=amazon.fasta).\n";
47 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
51 m->errorOut(e, "ListSeqsCommand", "getHelpString");
55 //**********************************************************************************************************************
56 string ListSeqsCommand::getOutputPattern(string type) {
60 if (type == "accnos") { pattern = "[filename],accnos"; }
61 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
66 m->errorOut(e, "ListSeqsCommand", "getOutputPattern");
70 //**********************************************************************************************************************
71 ListSeqsCommand::ListSeqsCommand(){
73 abort = true; calledHelp = true;
75 vector<string> tempOutNames;
76 outputTypes["accnos"] = tempOutNames;
79 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
83 //**********************************************************************************************************************
85 ListSeqsCommand::ListSeqsCommand(string option) {
87 abort = false; calledHelp = false;
89 //allow user to run help
90 if(option == "help") { help(); abort = true; calledHelp = true; }
91 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
93 vector<string> myArray = setParameters();
95 OptionParser parser(option);
96 map<string,string> parameters = parser.getParameters();
98 ValidParameters validParameter;
99 map<string,string>::iterator it;
101 //check to make sure all parameters are valid for command
102 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
103 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
106 //initialize outputTypes
107 vector<string> tempOutNames;
108 outputTypes["accnos"] = tempOutNames;
110 //if the user changes the output directory command factory will send this info to us in the output parameter
111 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
113 //if the user changes the input directory command factory will send this info to us in the output parameter
114 string inputDir = validParameter.validFile(parameters, "inputdir", false);
115 if (inputDir == "not found"){ inputDir = ""; }
118 it = parameters.find("alignreport");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
126 it = parameters.find("fasta");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["fasta"] = inputDir + it->second; }
134 it = parameters.find("list");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["list"] = inputDir + it->second; }
142 it = parameters.find("name");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["name"] = inputDir + it->second; }
150 it = parameters.find("group");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["group"] = inputDir + it->second; }
158 it = parameters.find("taxonomy");
159 //user has given a template file
160 if(it != parameters.end()){
161 path = m->hasPath(it->second);
162 //if the user has not given a path then, add inputdir. else leave path alone.
163 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
166 it = parameters.find("count");
167 //user has given a template file
168 if(it != parameters.end()){
169 path = m->hasPath(it->second);
170 //if the user has not given a path then, add inputdir. else leave path alone.
171 if (path == "") { parameters["count"] = inputDir + it->second; }
174 it = parameters.find("fastq");
175 //user has given a template file
176 if(it != parameters.end()){
177 path = m->hasPath(it->second);
178 //if the user has not given a path then, add inputdir. else leave path alone.
179 if (path == "") { parameters["fastq"] = inputDir + it->second; }
183 //check for required parameters
184 fastafile = validParameter.validFile(parameters, "fasta", true);
185 if (fastafile == "not open") { abort = true; }
186 else if (fastafile == "not found") { fastafile = ""; }
187 else { m->setFastaFile(fastafile); }
189 namefile = validParameter.validFile(parameters, "name", true);
190 if (namefile == "not open") { abort = true; }
191 else if (namefile == "not found") { namefile = ""; }
192 else { m->setNameFile(namefile); }
194 groupfile = validParameter.validFile(parameters, "group", true);
195 if (groupfile == "not open") { abort = true; }
196 else if (groupfile == "not found") { groupfile = ""; }
197 else { m->setGroupFile(groupfile); }
199 alignfile = validParameter.validFile(parameters, "alignreport", true);
200 if (alignfile == "not open") { abort = true; }
201 else if (alignfile == "not found") { alignfile = ""; }
203 listfile = validParameter.validFile(parameters, "list", true);
204 if (listfile == "not open") { abort = true; }
205 else if (listfile == "not found") { listfile = ""; }
206 else { m->setListFile(listfile); }
208 taxfile = validParameter.validFile(parameters, "taxonomy", true);
209 if (taxfile == "not open") { abort = true; }
210 else if (taxfile == "not found") { taxfile = ""; }
211 else { m->setTaxonomyFile(taxfile); }
213 countfile = validParameter.validFile(parameters, "count", true);
214 if (countfile == "not open") { abort = true; }
215 else if (countfile == "not found") { countfile = ""; }
216 else { m->setCountTableFile(countfile); }
218 fastqfile = validParameter.validFile(parameters, "fastq", true);
219 if (fastqfile == "not open") { abort = true; }
220 else if (fastqfile == "not found") { fastqfile = ""; }
222 if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
225 if (outputDir != "") { okay++; }
226 if (inputDir != "") { okay++; }
228 if (parameters.size() > okay) { m->mothurOut("You may only enter one file."); m->mothurOutEndLine(); abort = true; }
232 catch(exception& e) {
233 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
237 //**********************************************************************************************************************
239 int ListSeqsCommand::execute(){
242 if (abort == true) { if (calledHelp) { return 0; } return 2; }
244 //read functions fill names vector
245 if (fastafile != "") { inputFileName = fastafile; readFasta(); }
246 else if (fastqfile != "") { inputFileName = fastqfile; readFastq(); }
247 else if (namefile != "") { inputFileName = namefile; readName(); }
248 else if (groupfile != "") { inputFileName = groupfile; readGroup(); }
249 else if (alignfile != "") { inputFileName = alignfile; readAlign(); }
250 else if (listfile != "") { inputFileName = listfile; readList(); }
251 else if (taxfile != "") { inputFileName = taxfile; readTax(); }
252 else if (countfile != "") { inputFileName = countfile; readCount(); }
254 if (m->control_pressed) { outputTypes.clear(); return 0; }
256 //sort in alphabetical order
257 sort(names.begin(), names.end());
259 if (outputDir == "") { outputDir += m->hasPath(inputFileName); }
261 map<string, string> variables;
262 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFileName));
263 string outputFileName = getOutputFileName("accnos", variables);
266 m->openOutputFile(outputFileName, out);
267 outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
269 //output to .accnos file
270 for (int i = 0; i < names.size(); i++) {
272 if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; }
274 out << names[i] << endl;
278 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); return 0; }
280 m->setAccnosFile(outputFileName);
282 m->mothurOutEndLine();
283 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
284 m->mothurOut(outputFileName); m->mothurOutEndLine();
285 m->mothurOutEndLine();
287 //set accnos file as new current accnosfile
289 itTypes = outputTypes.find("accnos");
290 if (itTypes != outputTypes.end()) {
291 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
297 catch(exception& e) {
298 m->errorOut(e, "ListSeqsCommand", "execute");
302 //**********************************************************************************************************************
303 int ListSeqsCommand::readFastq(){
307 m->openInputFile(fastqfile, in);
311 //string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta";
312 //m->openOutputFile(newFastaName, out);
314 //string lastName = "";
318 if (m->control_pressed) { in.close(); return 0; }
321 string name = m->getline(in); m->gobble(in);
323 if (name[0] == '@') {
324 vector<string> splits = m->splitWhiteSpace(name);
326 name = name.substr(1);
328 names.push_back(name);
330 name = m->getline(in); m->gobble(in);
331 name = m->getline(in); m->gobble(in);
332 name = m->getline(in); m->gobble(in);
336 if (m->debug) { count++; cout << "[DEBUG]: count = " + toString(count) + ", name = " + name + "\n"; }
344 catch(exception& e) {
345 m->errorOut(e, "ListSeqsCommand", "readFastq");
350 //**********************************************************************************************************************
351 int ListSeqsCommand::readFasta(){
355 m->openInputFile(fastafile, in);
359 //string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta";
360 //m->openOutputFile(newFastaName, out);
362 //string lastName = "";
366 if (m->control_pressed) { in.close(); return 0; }
368 Sequence currSeq(in);
369 name = currSeq.getName();
371 if (name != "") { names.push_back(name); }
374 if (m->debug) { count++; cout << "[DEBUG]: count = " + toString(count) + ", name = " + currSeq.getName() + "\n"; }
382 catch(exception& e) {
383 m->errorOut(e, "ListSeqsCommand", "readFasta");
387 //**********************************************************************************************************************
388 int ListSeqsCommand::readList(){
391 m->openInputFile(listfile, in);
394 //read in list vector
398 for (int i = 0; i < list.getNumBins(); i++) {
399 string binnames = list.get(i);
401 if (m->control_pressed) { in.close(); return 0; }
403 m->splitAtComma(binnames, names);
411 catch(exception& e) {
412 m->errorOut(e, "ListSeqsCommand", "readList");
417 //**********************************************************************************************************************
418 int ListSeqsCommand::readName(){
422 m->openInputFile(namefile, in);
423 string name, firstCol, secondCol;
427 if (m->control_pressed) { in.close(); return 0; }
432 //parse second column saving each name
433 m->splitAtComma(secondCol, names);
441 catch(exception& e) {
442 m->errorOut(e, "ListSeqsCommand", "readName");
447 //**********************************************************************************************************************
448 int ListSeqsCommand::readGroup(){
452 m->openInputFile(groupfile, in);
457 if (m->control_pressed) { in.close(); return 0; }
459 in >> name; m->gobble(in); //read from first column
460 in >> group; //read from second column
462 names.push_back(name);
470 catch(exception& e) {
471 m->errorOut(e, "ListSeqsCommand", "readGroup");
475 //**********************************************************************************************************************
476 int ListSeqsCommand::readCount(){
479 ct.readTable(countfile, false, false);
481 if (m->control_pressed) { return 0; }
483 names = ct.getNamesOfSeqs();
488 catch(exception& e) {
489 m->errorOut(e, "ListSeqsCommand", "readCount");
493 //**********************************************************************************************************************
494 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
495 int ListSeqsCommand::readAlign(){
499 m->openInputFile(alignfile, in);
502 //read column headers
503 for (int i = 0; i < 16; i++) {
504 if (!in.eof()) { in >> junk; }
511 if (m->control_pressed) { in.close(); return 0; }
513 in >> name; //read from first column
516 for (int i = 0; i < 15; i++) {
517 if (!in.eof()) { in >> junk; }
521 names.push_back(name);
531 catch(exception& e) {
532 m->errorOut(e, "ListSeqsCommand", "readAlign");
536 //**********************************************************************************************************************
537 int ListSeqsCommand::readTax(){
541 m->openInputFile(taxfile, in);
542 string name, firstCol, secondCol;
546 if (m->control_pressed) { in.close(); return 0; }
551 names.push_back(firstCol);
561 catch(exception& e) {
562 m->errorOut(e, "ListSeqsCommand", "readTax");
566 //**********************************************************************************************************************