]> git.donarmstrong.com Git - mothur.git/blob - binsequencecommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / binsequencecommand.cpp
1 /*
2  *  binsequencecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/3/09.
6  *  Copyright 2009 Schloss Lab UMASS Amhers. All rights reserved.
7  *
8  */
9
10 #include "binsequencecommand.h"
11
12
13 //**********************************************************************************************************************
14 vector<string> BinSeqCommand::setParameters(){  
15         try {
16                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23         
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "BinSeqCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string BinSeqCommand::getHelpString(){  
35         try {
36                 string helpString = "";
37                 helpString += "The bin.seqs command parameters are list, fasta, name, label and group.  The fasta and list are required, unless you have a valid current list and fasta file.\n";
38                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and are separated by dashes.\n";
39                 helpString += "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
40                 helpString += "Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, name=amazon.names).\n";
41                 helpString += "The default value for label is all lines in your inputfile.\n";
42                 helpString += "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name.\n";
43                 helpString += "If you provide a groupfile, then it also appends the sequences group to the name.\n";
44                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
45                 return helpString;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "BinSeqCommand", "getHelpString");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string BinSeqCommand::getOutputFileNameTag(string type, string inputName=""){   
54         try {
55         string outputFileName = "";
56                 map<string, vector<string> >::iterator it;
57         
58         //is this a type this command creates
59         it = outputTypes.find(type);
60         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
61         else {
62             if (type == "fasta") {  outputFileName =  "fasta"; }
63             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
64         }
65         return outputFileName;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "BinSeqCommand", "getOutputFileNameTag");
69                 exit(1);
70         }
71 }
72 //**********************************************************************************************************************
73 BinSeqCommand::BinSeqCommand(){ 
74         try {
75                 abort = true; calledHelp = true; 
76                 setParameters();
77                 vector<string> tempOutNames;
78                 outputTypes["fasta"] = tempOutNames;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "BinSeqCommand", "BinSeqCommand");
82                 exit(1);
83         }
84 }
85 //**********************************************************************************************************************
86 BinSeqCommand::BinSeqCommand(string option) {
87         try {
88                 abort = false; calledHelp = false;   
89                 allLines = 1;
90                 labels.clear();
91                 
92                 //allow user to run help
93                 if(option == "help") { help(); abort = true; calledHelp = true; }
94                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
95                 
96                 else {
97                         vector<string> myArray = setParameters();
98                         
99                         OptionParser parser(option);
100                         map<string, string> parameters = parser.getParameters();
101                         
102                         ValidParameters validParameter;
103                         map<string, string>::iterator it;
104                 
105                         //check to make sure all parameters are valid for command
106                         for (it = parameters.begin(); it != parameters.end(); it++) { 
107                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
108                         }
109                         
110                         //initialize outputTypes
111                         vector<string> tempOutNames;
112                         outputTypes["fasta"] = tempOutNames;
113                         
114                         //if the user changes the input directory command factory will send this info to us in the output parameter 
115                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
116                         if (inputDir == "not found"){   inputDir = "";          }
117                         else {
118                                 string path;
119                                 it = parameters.find("fasta");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
125                                 }
126                                 
127                                 it = parameters.find("list");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
133                                 }
134                                 
135                                 it = parameters.find("name");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
141                                 }
142                                 
143                                 it = parameters.find("group");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
149                                 }
150                         }
151
152                         
153                         //check for required parameters
154                         fastafile = validParameter.validFile(parameters, "fasta", true);
155                         if (fastafile == "not found") {                                 //if there is a current phylip file, use it
156                                 fastafile = m->getFastaFile(); 
157                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
158                                 else {  m->mothurOut("You have no current fasta file and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
159                         }
160                         else if (fastafile == "not open") { abort = true; }     
161                         else { m->setFastaFile(fastafile); }
162                         
163                         listfile = validParameter.validFile(parameters, "list", true);
164                         if (listfile == "not found") {                  
165                                 listfile = m->getListFile(); 
166                                 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
167                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
168                         }
169                         else if (listfile == "not open") { listfile = ""; abort = true; }       
170                         else { m->setListFile(listfile); }
171                         
172                         //if the user changes the output directory command factory will send this info to us in the output parameter 
173                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
174                                 outputDir = ""; 
175                                 outputDir += m->hasPath(listfile); //if user entered a file with a path then preserve it        
176                         }
177                         
178                 
179                         //check for optional parameter and set defaults
180                         // ...at some point should added some additional type checking...
181                         
182                         label = validParameter.validFile(parameters, "label", false);                   
183                         if (label == "not found") { label = ""; }
184                         else { 
185                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
186                                 else { allLines = 1;  }
187                         }
188                         
189                         namesfile = validParameter.validFile(parameters, "name", true);
190                         if (namesfile == "not open") { namesfile = ""; abort = true; }  
191                         else if (namesfile == "not found") { namesfile = ""; }
192                         else {  m->setNameFile(namesfile); }
193
194                         groupfile = validParameter.validFile(parameters, "group", true);
195                         if (groupfile == "not open") { abort = true; }
196                         else if (groupfile == "not found") { groupfile = ""; }
197                         else { m->setGroupFile(groupfile); }
198                         
199                         if (namesfile == ""){
200                                 vector<string> files; files.push_back(fastafile); 
201                                 parser.getNameFile(files);
202                         }
203                         
204                 }
205         }
206         catch(exception& e) {
207                 m->errorOut(e, "BinSeqCommand", "BinSeqCommand");
208                 exit(1);
209         }
210 }
211 //**********************************************************************************************************************
212
213 BinSeqCommand::~BinSeqCommand(){}
214 //**********************************************************************************************************************
215
216 int BinSeqCommand::execute(){
217         try {
218                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
219         
220                 int error = 0;
221                 
222                 fasta = new FastaMap();
223                 if (groupfile != "") {
224                         groupMap = new GroupMap(groupfile);
225                         groupMap->readMap();
226                 }
227                 
228                 //read fastafile
229                 fasta->readFastaFile(fastafile);
230                 
231                 //if user gave a namesfile then use it
232                 if (namesfile != "") {
233                         readNamesFile();
234                 }
235                 
236                 input = new InputData(listfile, "list");
237                 list = input->getListVector();
238                 string lastLabel = list->getLabel();
239                 
240                 if (m->control_pressed) {  delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
241                 
242                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
243                 set<string> processedLabels;
244                 set<string> userLabels = labels;
245
246                                 
247                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
248                         
249                         if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
250                         
251                         if(allLines == 1 || labels.count(list->getLabel()) == 1){
252                                 
253                                 error = process(list);  
254                                 if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
255                                                         
256                                 processedLabels.insert(list->getLabel());
257                                 userLabels.erase(list->getLabel());
258                         }
259                         
260                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
261                                 string saveLabel = list->getLabel();
262                                 
263                                 delete list;
264                                 list = input->getListVector(lastLabel);
265                                 
266                                 error = process(list);  
267                                 if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
268                                                                                                         
269                                 processedLabels.insert(list->getLabel());
270                                 userLabels.erase(list->getLabel());
271                                 
272                                 //restore real lastlabel to save below
273                                 list->setLabel(saveLabel);
274                         }
275                         
276                         lastLabel = list->getLabel();                   
277                         
278                         delete list;
279                         list = input->getListVector();
280                 }
281                 
282                 if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
283
284                 //output error messages about any remaining user labels
285                 set<string>::iterator it;
286                 bool needToRun = false;
287                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
288                         m->mothurOut("Your file does not include the label " + *it); 
289                         if (processedLabels.count(lastLabel) != 1) {
290                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
291                                 needToRun = true;
292                         }else {
293                                 m->mothurOut(". Please refer to " + lastLabel + ".");  m->mothurOutEndLine();
294                         }
295                 }
296                 
297                 //run last label if you need to
298                 if (needToRun == true)  {
299                         if (list != NULL) {             delete list;    }
300                         list = input->getListVector(lastLabel);
301                                 
302                         error = process(list);  
303                         if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
304                         
305                         delete list;  
306                 }
307                 
308                 delete input;  
309                 delete fasta; 
310                 if (groupfile != "") {  delete groupMap;   } 
311                 
312                 if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                }  return 0; }  
313                 
314                 m->mothurOutEndLine();
315                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
316                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
317                 m->mothurOutEndLine();
318
319                 
320                 return 0;
321         }
322         catch(exception& e) {
323                 m->errorOut(e, "BinSeqCommand", "execute");
324                 exit(1);
325         }
326 }
327
328 //**********************************************************************************************************************
329 void BinSeqCommand::readNamesFile() {
330         try {
331                 vector<string> dupNames;
332                 m->openInputFile(namesfile, inNames);
333                 
334                 string name, names, sequence;
335         
336                 while(inNames){
337                         inNames >> name;                        //read from first column  A
338                         inNames >> names;               //read from second column  A,B,C,D
339                         
340                         dupNames.clear();
341                         
342                         //parse names into vector
343                         m->splitAtComma(names, dupNames);
344                         
345                         //store names in fasta map
346                         sequence = fasta->getSequence(name);
347                         for (int i = 0; i < dupNames.size(); i++) {
348                                 fasta->push_back(dupNames[i], sequence);
349                         }
350                 
351                         m->gobble(inNames);
352                 }
353                 inNames.close();
354
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "BinSeqCommand", "readNamesFile");
358                 exit(1);
359         }
360 }
361 //**********************************************************************************************************************
362 //return 1 if error, 0 otherwise
363 int BinSeqCommand::process(ListVector* list) {
364         try {
365                                 string binnames, name, sequence;
366                                 
367         string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + getOutputFileNameTag("fasta");
368         m->openOutputFile(outputFileName, out);
369                                 
370                                 //save to output list of output file names
371                                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
372
373                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
374                                 
375                                 //for each bin in the list vector
376                                 for (int i = 0; i < list->size(); i++) {
377                                         
378                                         if (m->control_pressed) {  return 1; }
379                                         
380                                         binnames = list->get(i);
381                                         while (binnames.find_first_of(',') != -1) { 
382                                                 name = binnames.substr(0,binnames.find_first_of(','));
383                                                 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
384                                                 
385                                                 //do work for that name
386                                                 sequence = fasta->getSequence(name);
387                                                 if (sequence != "not found") {
388                                                         //if you don't have groups
389                                                         if (groupfile == "") {
390                                                                 name = name + "\t" + toString(i+1);
391                                                                 out << ">" << name << endl;
392                                                                 out << sequence << endl;
393                                                         }else {//if you do have groups
394                                                                 string group = groupMap->getGroup(name);
395                                                                 if (group == "not found") {  
396                                                                         m->mothurOut(name + " is missing from your group file. Please correct. ");  m->mothurOutEndLine();
397                                                                         return 1;
398                                                                 }else{
399                                                                         name = name + "\t" + group + "\t" + toString(i+1);
400                                                                         out << ">" << name << endl;
401                                                                         out << sequence << endl;
402                                                                 }
403                                                         }
404                                                 }else { 
405                                                         m->mothurOut(name + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine();
406                                                         return 1;
407                                                 }
408                                                 
409                                         }
410                                         
411                                         //get last name
412                                         sequence = fasta->getSequence(binnames);
413                                         if (sequence != "not found") {
414                                                 //if you don't have groups
415                                                 if (groupfile == "") {
416                                                         binnames = binnames + "\t" + toString(i+1);
417                                                         out << ">" << binnames << endl;
418                                                         out << sequence << endl;
419                                                 }else {//if you do have groups
420                                                         string group = groupMap->getGroup(binnames);
421                                                         if (group == "not found") {  
422                                                                 m->mothurOut(binnames + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
423                                                                 return 1;
424                                                         }else{
425                                                                 binnames = binnames + "\t" + group + "\t" + toString(i+1);
426                                                                 out << ">" << binnames << endl;
427                                                                 out << sequence << endl;
428                                                         }
429                                                 }
430                                         }else { 
431                                                 m->mothurOut(binnames + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine();
432                                                 return 1;
433                                         }
434                                 }
435                                         
436                                 out.close();
437                                 return 0;
438
439         }
440         catch(exception& e) {
441                 m->errorOut(e, "BinSeqCommand", "process");
442                 exit(1);
443         }
444 }
445 //**********************************************************************************************************************
446
447