]> git.donarmstrong.com Git - mothur.git/blob - listseqscommand.cpp
changed added group output to indicator command. a few changes to work with the guy
[mothur.git] / listseqscommand.cpp
1 /*
2  *  listseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "listseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14
15
16 //**********************************************************************************************************************
17 vector<string> ListSeqsCommand::setParameters(){        
18         try {
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pfasta);
20                 CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pname);
21         CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pcount);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pgroup);
23                 CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(plist);
24                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
25                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(palignreport);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ListSeqsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string ListSeqsCommand::getHelpString(){        
40         try {
41                 string helpString = "";
42                 helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
43                 helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy and alignreport.  You must provide one of these parameters.\n";
44                 helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
45                 helpString += "Example list.seqs(fasta=amazon.fasta).\n";
46                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
47                 return helpString;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "ListSeqsCommand", "getHelpString");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 string ListSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ 
56         try {
57         string outputFileName = "";
58                 map<string, vector<string> >::iterator it;
59         
60         //is this a type this command creates
61         it = outputTypes.find(type);
62         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
63         else {
64             if (type == "accnos")             {   outputFileName =  "accnos";       }
65             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
66         }
67         return outputFileName;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "ListSeqsCommand", "getOutputFileNameTag");
71                 exit(1);
72         }
73 }
74 //**********************************************************************************************************************
75 ListSeqsCommand::ListSeqsCommand(){     
76         try {
77                 abort = true; calledHelp = true; 
78                 setParameters();
79                 vector<string> tempOutNames;
80                 outputTypes["accnos"] = tempOutNames;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88
89 ListSeqsCommand::ListSeqsCommand(string option)  {
90         try {
91                 abort = false; calledHelp = false;   
92                 
93                 //allow user to run help
94                 if(option == "help") { help(); abort = true; calledHelp = true; }
95                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
96                 else {
97                         vector<string> myArray = setParameters();
98                         
99                         OptionParser parser(option);
100                         map<string,string> parameters = parser.getParameters();
101                         
102                         ValidParameters validParameter;
103                         map<string,string>::iterator it;
104                         
105                         //check to make sure all parameters are valid for command
106                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
107                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
108                         }
109                         
110                         //initialize outputTypes
111                         vector<string> tempOutNames;
112                         outputTypes["accnos"] = tempOutNames;
113                         
114                         //if the user changes the output directory command factory will send this info to us in the output parameter 
115                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
116                         
117                         //if the user changes the input directory command factory will send this info to us in the output parameter 
118                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
119                         if (inputDir == "not found"){   inputDir = "";          }
120                         else {
121                                 string path;
122                                 it = parameters.find("alignreport");
123                                 //user has given a template file
124                                 if(it != parameters.end()){ 
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
128                                 }
129                                 
130                                 it = parameters.find("fasta");
131                                 //user has given a template file
132                                 if(it != parameters.end()){ 
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
136                                 }
137                                 
138                                 it = parameters.find("list");
139                                 //user has given a template file
140                                 if(it != parameters.end()){ 
141                                         path = m->hasPath(it->second);
142                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
144                                 }
145                                 
146                                 it = parameters.find("name");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
152                                 }
153                                 
154                                 it = parameters.find("group");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
160                                 }
161                                 
162                                 it = parameters.find("taxonomy");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
168                                 }
169                 
170                 it = parameters.find("count");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
176                                 }
177                         }
178
179                         //check for required parameters
180                         fastafile = validParameter.validFile(parameters, "fasta", true);
181                         if (fastafile == "not open") { abort = true; }
182                         else if (fastafile == "not found") {  fastafile = "";  }
183                         else { m->setFastaFile(fastafile); }
184                         
185                         namefile = validParameter.validFile(parameters, "name", true);
186                         if (namefile == "not open") { abort = true; }
187                         else if (namefile == "not found") {  namefile = "";  }  
188                         else { m->setNameFile(namefile); }
189                         
190                         groupfile = validParameter.validFile(parameters, "group", true);
191                         if (groupfile == "not open") { abort = true; }
192                         else if (groupfile == "not found") {  groupfile = "";  }        
193                         else { m->setGroupFile(groupfile); }
194                         
195                         alignfile = validParameter.validFile(parameters, "alignreport", true);
196                         if (alignfile == "not open") { abort = true; }
197                         else if (alignfile == "not found") {  alignfile = "";  }
198                         
199                         listfile = validParameter.validFile(parameters, "list", true);
200                         if (listfile == "not open") { abort = true; }
201                         else if (listfile == "not found") {  listfile = "";  }
202                         else { m->setListFile(listfile); }
203                         
204                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
205                         if (taxfile == "not open") { abort = true; }
206                         else if (taxfile == "not found") {  taxfile = "";  }
207                         else { m->setTaxonomyFile(taxfile); }
208             
209             countfile = validParameter.validFile(parameters, "count", true);
210                         if (countfile == "not open") { abort = true; }
211                         else if (countfile == "not found") {  countfile = "";  }
212                         else { m->setCountTableFile(countfile); }
213                         
214                         if ((countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == ""))  { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
215                         
216                         int okay = 1;
217                         if (outputDir != "") { okay++; }
218                         if (inputDir != "") { okay++; }
219                         
220                         if (parameters.size() > okay) { m->mothurOut("You may only enter one file."); m->mothurOutEndLine(); abort = true;  }
221                 }
222
223         }
224         catch(exception& e) {
225                 m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
226                 exit(1);
227         }
228 }
229 //**********************************************************************************************************************
230
231 int ListSeqsCommand::execute(){
232         try {
233                 
234                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
235                 
236                 //read functions fill names vector
237                 if (fastafile != "")            {       inputFileName = fastafile;      readFasta();    }
238                 else if (namefile != "")        {       inputFileName = namefile;       readName();             }
239                 else if (groupfile != "")       {       inputFileName = groupfile;      readGroup();    }
240                 else if (alignfile != "")       {       inputFileName = alignfile;      readAlign();    }
241                 else if (listfile != "")        {       inputFileName = listfile;       readList();             }
242                 else if (taxfile != "")         {       inputFileName = taxfile;        readTax();              }
243         else if (countfile != "")       {       inputFileName = countfile;      readCount();    }
244                 
245                 if (m->control_pressed) { outputTypes.clear();  return 0; }
246                 
247                 //sort in alphabetical order
248                 sort(names.begin(), names.end());
249                 
250                 if (outputDir == "") {  outputDir += m->hasPath(inputFileName);  }
251                 
252                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + getOutputFileNameTag("accnos");
253
254                 ofstream out;
255                 m->openOutputFile(outputFileName, out);
256                 outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
257                 
258                 //output to .accnos file
259                 for (int i = 0; i < names.size(); i++) {
260                         
261                         if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; }
262                         
263                         out << names[i] << endl;
264                 }
265                 out.close();
266                 
267                 if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFileName); return 0; }
268                 
269                 m->setAccnosFile(outputFileName);
270                 
271                 m->mothurOutEndLine();
272                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
273                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
274                 m->mothurOutEndLine();
275                 
276                 //set accnos file as new current accnosfile
277                 string current = "";
278                 itTypes = outputTypes.find("accnos");
279                 if (itTypes != outputTypes.end()) {
280                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
281                 }
282                 
283                 return 0;               
284         }
285
286         catch(exception& e) {
287                 m->errorOut(e, "ListSeqsCommand", "execute");
288                 exit(1);
289         }
290 }
291
292 //**********************************************************************************************************************
293 int ListSeqsCommand::readFasta(){
294         try {
295                 
296                 ifstream in;
297                 m->openInputFile(fastafile, in);
298                 string name;
299                 
300                 //ofstream out;
301                 //string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta";
302                 //m->openOutputFile(newFastaName, out);
303                 //int count = 1;
304                 //string lastName = "";
305                 
306                 while(!in.eof()){
307                         
308                         if (m->control_pressed) { in.close(); return 0; }
309                         
310                         Sequence currSeq(in);
311                         name = currSeq.getName();
312                         
313                         if (name != "") {  names.push_back(name);  }
314                         
315                         m->gobble(in);
316                         //count++;
317                 }
318                 in.close();     
319                 //out.close();
320                 
321                 return 0;
322
323         }
324         catch(exception& e) {
325                 m->errorOut(e, "ListSeqsCommand", "readFasta");
326                 exit(1);
327         }
328 }
329 //**********************************************************************************************************************
330 int ListSeqsCommand::readList(){
331         try {
332                 ifstream in;
333                 m->openInputFile(listfile, in);
334                 
335                 if(!in.eof()){
336                         //read in list vector
337                         ListVector list(in);
338                         
339                         //for each bin
340                         for (int i = 0; i < list.getNumBins(); i++) {
341                                 string binnames = list.get(i);
342                                 
343                                 if (m->control_pressed) { in.close(); return 0; }
344                                 
345                                 m->splitAtComma(binnames, names);
346                         }
347                 }
348                 in.close();     
349                 
350                 return 0;
351                 
352         }
353         catch(exception& e) {
354                 m->errorOut(e, "ListSeqsCommand", "readList");
355                 exit(1);
356         }
357 }
358
359 //**********************************************************************************************************************
360 int ListSeqsCommand::readName(){
361         try {
362                 
363                 ifstream in;
364                 m->openInputFile(namefile, in);
365                 string name, firstCol, secondCol;
366                 
367                 while(!in.eof()){
368                 
369                         if (m->control_pressed) { in.close(); return 0; }
370
371                         in >> firstCol;                         
372                         in >> secondCol;                        
373                         
374                         //parse second column saving each name
375                         m->splitAtComma(secondCol, names);
376                         
377                         m->gobble(in);
378                 }
379                 in.close();
380                 return 0;
381                 
382         }
383         catch(exception& e) {
384                 m->errorOut(e, "ListSeqsCommand", "readName");
385                 exit(1);
386         }
387 }
388
389 //**********************************************************************************************************************
390 int ListSeqsCommand::readGroup(){
391         try {
392         
393                 ifstream in;
394                 m->openInputFile(groupfile, in);
395                 string name, group;
396                 
397                 while(!in.eof()){
398                         
399                         if (m->control_pressed) { in.close(); return 0; }
400                         
401                         in >> name;     m->gobble(in);                  //read from first column
402                         in >> group;                    //read from second column
403                         
404                         names.push_back(name);
405                                         
406                         m->gobble(in);
407                 }
408                 in.close();
409                 return 0;
410
411         }
412         catch(exception& e) {
413                 m->errorOut(e, "ListSeqsCommand", "readGroup");
414                 exit(1);
415         }
416 }
417 //**********************************************************************************************************************
418 int ListSeqsCommand::readCount(){
419         try {
420                 CountTable ct;
421                 ct.readTable(countfile);
422         
423         if (m->control_pressed) { return 0; }
424         
425         names = ct.getNamesOfSeqs();
426         
427         return 0;
428         
429         }
430         catch(exception& e) {
431                 m->errorOut(e, "ListSeqsCommand", "readCount");
432                 exit(1);
433         }
434 }
435 //**********************************************************************************************************************
436 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
437 int ListSeqsCommand::readAlign(){
438         try {
439         
440                 ifstream in;
441                 m->openInputFile(alignfile, in);
442                 string name, junk;
443                 
444                 //read column headers
445                 for (int i = 0; i < 16; i++) {  
446                         if (!in.eof())  {       in >> junk;             }
447                         else                    {       break;                  }
448                 }
449                 //m->getline(in);
450                 
451                 while(!in.eof()){
452                 
453                         if (m->control_pressed) { in.close(); return 0; }
454
455                         in >> name;                             //read from first column
456                         //m->getline(in);
457                         //read rest
458                         for (int i = 0; i < 15; i++) {  
459                                 if (!in.eof())  {       in >> junk;             }
460                                 else                    {       break;                  }
461                         }
462                         
463                         names.push_back(name);
464                                         
465                         m->gobble(in);
466                 }
467                 in.close();
468                 
469                 return 0;
470
471                 
472         }
473         catch(exception& e) {
474                 m->errorOut(e, "ListSeqsCommand", "readAlign");
475                 exit(1);
476         }
477 }
478 //**********************************************************************************************************************
479 int ListSeqsCommand::readTax(){
480         try {
481                 
482                 ifstream in;
483                 m->openInputFile(taxfile, in);
484                 string name, firstCol, secondCol;
485                 
486                 while(!in.eof()){
487                 
488                         if (m->control_pressed) { in.close(); return 0; }
489
490                         in >> firstCol;                         
491                         in >> secondCol;                        
492                         
493                         names.push_back(firstCol);
494                         
495                         m->gobble(in);
496                         
497                 }
498                 in.close();
499                 
500                 return 0;
501                 
502         }
503         catch(exception& e) {
504                 m->errorOut(e, "ListSeqsCommand", "readTax");
505                 exit(1);
506         }
507 }
508 //**********************************************************************************************************************