]> git.donarmstrong.com Git - mothur.git/blob - binsequencecommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / binsequencecommand.cpp
1 /*
2  *  binsequencecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/3/09.
6  *  Copyright 2009 Schloss Lab UMASS Amhers. All rights reserved.
7  *
8  */
9
10 #include "binsequencecommand.h"
11
12
13 //**********************************************************************************************************************
14 vector<string> BinSeqCommand::setParameters(){  
15         try {
16                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(plist);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
18         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
19         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
20                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
21                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24         
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "BinSeqCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string BinSeqCommand::getHelpString(){  
36         try {
37                 string helpString = "";
38                 helpString += "The bin.seqs command parameters are list, fasta, name, count, label and group.  The fasta and list are required, unless you have a valid current list and fasta file.\n";
39                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and are separated by dashes.\n";
40                 helpString += "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
41                 helpString += "Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, name=amazon.names).\n";
42                 helpString += "The default value for label is all lines in your inputfile.\n";
43                 helpString += "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name.\n";
44                 helpString += "If you provide a groupfile, then it also appends the sequences group to the name.\n";
45                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
46                 return helpString;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "BinSeqCommand", "getHelpString");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 string BinSeqCommand::getOutputPattern(string type) {
55     try {
56         string pattern = "";
57         
58         if (type == "fasta") {  pattern = "[filename],[distance],fasta"; } //makes file like: amazon.0.03.fasta
59         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
60         
61         return pattern;
62     }
63     catch(exception& e) {
64         m->errorOut(e, "BinSeqCommand", "getOutputPattern");
65         exit(1);
66     }
67 }
68 //**********************************************************************************************************************
69 BinSeqCommand::BinSeqCommand(){ 
70         try {
71                 abort = true; calledHelp = true; 
72                 setParameters();
73                 vector<string> tempOutNames;
74                 outputTypes["fasta"] = tempOutNames;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "BinSeqCommand", "BinSeqCommand");
78                 exit(1);
79         }
80 }
81 //**********************************************************************************************************************
82 BinSeqCommand::BinSeqCommand(string option) {
83         try {
84                 abort = false; calledHelp = false;   
85                 allLines = 1;
86                 labels.clear();
87                 
88                 //allow user to run help
89                 if(option == "help") { help(); abort = true; calledHelp = true; }
90                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
91                 
92                 else {
93                         vector<string> myArray = setParameters();
94                         
95                         OptionParser parser(option);
96                         map<string, string> parameters = parser.getParameters();
97                         
98                         ValidParameters validParameter;
99                         map<string, string>::iterator it;
100                 
101                         //check to make sure all parameters are valid for command
102                         for (it = parameters.begin(); it != parameters.end(); it++) { 
103                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
104                         }
105                         
106                         //initialize outputTypes
107                         vector<string> tempOutNames;
108                         outputTypes["fasta"] = tempOutNames;
109                         
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("fasta");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
121                                 }
122                                 
123                                 it = parameters.find("list");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
129                                 }
130                                 
131                                 it = parameters.find("name");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
137                                 }
138                                 
139                                 it = parameters.find("group");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
145                                 }
146                 
147                 it = parameters.find("count");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
153                                 }
154                         }
155
156                         
157                         //check for required parameters
158                         fastafile = validParameter.validFile(parameters, "fasta", true);
159                         if (fastafile == "not found") {                                 //if there is a current phylip file, use it
160                                 fastafile = m->getFastaFile(); 
161                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
162                                 else {  m->mothurOut("You have no current fasta file and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
163                         }
164                         else if (fastafile == "not open") { abort = true; }     
165                         else { m->setFastaFile(fastafile); }
166                         
167                         listfile = validParameter.validFile(parameters, "list", true);
168                         if (listfile == "not found") {                  
169                                 listfile = m->getListFile(); 
170                                 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
171                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
172                         }
173                         else if (listfile == "not open") { listfile = ""; abort = true; }       
174                         else { m->setListFile(listfile); }
175                         
176                         //if the user changes the output directory command factory will send this info to us in the output parameter 
177                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
178                                 outputDir = ""; 
179                                 outputDir += m->hasPath(listfile); //if user entered a file with a path then preserve it        
180                         }
181                         
182                 
183                         //check for optional parameter and set defaults
184                         // ...at some point should added some additional type checking...
185                         
186                         label = validParameter.validFile(parameters, "label", false);                   
187                         if (label == "not found") { label = ""; }
188                         else { 
189                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
190                                 else { allLines = 1;  }
191                         }
192                         
193                         namesfile = validParameter.validFile(parameters, "name", true);
194                         if (namesfile == "not open") { namesfile = ""; abort = true; }  
195                         else if (namesfile == "not found") { namesfile = ""; }
196                         else {  m->setNameFile(namesfile); }
197
198                         groupfile = validParameter.validFile(parameters, "group", true);
199                         if (groupfile == "not open") { abort = true; }
200                         else if (groupfile == "not found") { groupfile = ""; }
201                         else { m->setGroupFile(groupfile); }
202             
203             countfile = validParameter.validFile(parameters, "count", true);
204                         if (countfile == "not open") { countfile = ""; abort = true; }
205                         else if (countfile == "not found") { countfile = "";  } 
206                         else { m->setCountTableFile(countfile); }
207             
208             if ((namesfile != "") && (countfile != "")) {
209                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
210             }
211                         
212             if ((groupfile != "") && (countfile != "")) {
213                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
214             }
215                         
216             if (countfile == "") {
217                 if (namesfile == ""){
218                     vector<string> files; files.push_back(fastafile); 
219                     parser.getNameFile(files);
220                 }
221             }
222                         
223                 }
224         }
225         catch(exception& e) {
226                 m->errorOut(e, "BinSeqCommand", "BinSeqCommand");
227                 exit(1);
228         }
229 }
230 //**********************************************************************************************************************
231
232 BinSeqCommand::~BinSeqCommand(){}
233 //**********************************************************************************************************************
234
235 int BinSeqCommand::execute(){
236         try {
237                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
238         
239                 int error = 0;
240                 
241                 fasta = new FastaMap();
242                 if (groupfile != "") {
243                         groupMap = new GroupMap(groupfile);
244                         groupMap->readMap();
245                 }
246                 
247                 //read fastafile
248                 fasta->readFastaFile(fastafile);
249                 
250                 //if user gave a namesfile then use it
251                 if (namesfile != "") {  readNamesFile();  }
252         if (countfile != "") {  ct.readTable(countfile, true);  }
253                 
254                 input = new InputData(listfile, "list");
255                 list = input->getListVector();
256                 string lastLabel = list->getLabel();
257                 
258                 if (m->control_pressed) {  delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
259                 
260                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
261                 set<string> processedLabels;
262                 set<string> userLabels = labels;
263
264                                 
265                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
266                         
267                         if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
268                         
269                         if(allLines == 1 || labels.count(list->getLabel()) == 1){
270                                 
271                                 error = process(list);  
272                                 if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
273                                                         
274                                 processedLabels.insert(list->getLabel());
275                                 userLabels.erase(list->getLabel());
276                         }
277                         
278                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
279                                 string saveLabel = list->getLabel();
280                                 
281                                 delete list;
282                                 list = input->getListVector(lastLabel);
283                                 
284                                 error = process(list);  
285                                 if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
286                                                                                                         
287                                 processedLabels.insert(list->getLabel());
288                                 userLabels.erase(list->getLabel());
289                                 
290                                 //restore real lastlabel to save below
291                                 list->setLabel(saveLabel);
292                         }
293                         
294                         lastLabel = list->getLabel();                   
295                         
296                         delete list;
297                         list = input->getListVector();
298                 }
299                 
300                 if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
301
302                 //output error messages about any remaining user labels
303                 set<string>::iterator it;
304                 bool needToRun = false;
305                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
306                         m->mothurOut("Your file does not include the label " + *it); 
307                         if (processedLabels.count(lastLabel) != 1) {
308                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
309                                 needToRun = true;
310                         }else {
311                                 m->mothurOut(". Please refer to " + lastLabel + ".");  m->mothurOutEndLine();
312                         }
313                 }
314                 
315                 //run last label if you need to
316                 if (needToRun == true)  {
317                         if (list != NULL) {             delete list;    }
318                         list = input->getListVector(lastLabel);
319                                 
320                         error = process(list);  
321                         if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
322                         
323                         delete list;  
324                 }
325                 
326                 delete input;  
327                 delete fasta; 
328                 if (groupfile != "") {  delete groupMap;   } 
329                 
330                 if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                }  return 0; }  
331         
332         //set align file as new current fastafile
333                 string currentFasta = "";
334                 itTypes = outputTypes.find("fasta");
335                 if (itTypes != outputTypes.end()) {
336                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
337                 }
338                 
339                 m->mothurOutEndLine();
340                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
341                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
342                 m->mothurOutEndLine();
343
344                 
345                 return 0;
346         }
347         catch(exception& e) {
348                 m->errorOut(e, "BinSeqCommand", "execute");
349                 exit(1);
350         }
351 }
352
353 //**********************************************************************************************************************
354 void BinSeqCommand::readNamesFile() {
355         try {
356                 vector<string> dupNames;
357                 m->openInputFile(namesfile, inNames);
358                 
359                 string name, names, sequence;
360         
361                 while(inNames){
362                         inNames >> name;                        //read from first column  A
363                         inNames >> names;               //read from second column  A,B,C,D
364                         
365                         dupNames.clear();
366                         
367                         //parse names into vector
368                         m->splitAtComma(names, dupNames);
369                         
370                         //store names in fasta map
371                         sequence = fasta->getSequence(name);
372                         for (int i = 0; i < dupNames.size(); i++) {
373                                 fasta->push_back(dupNames[i], sequence);
374                         }
375                 
376                         m->gobble(inNames);
377                 }
378                 inNames.close();
379
380         }
381         catch(exception& e) {
382                 m->errorOut(e, "BinSeqCommand", "readNamesFile");
383                 exit(1);
384         }
385 }
386 //**********************************************************************************************************************
387 //return 1 if error, 0 otherwise
388 int BinSeqCommand::process(ListVector* list) {
389         try {
390         map<string, string> variables; 
391         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
392         variables["[distance]"] = list->getLabel();
393         string outputFileName = getOutputFileName("fasta", variables);
394         
395         m->openOutputFile(outputFileName, out);
396         outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
397         
398         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
399         
400         //for each bin in the list vector
401         for (int i = 0; i < list->size(); i++) {
402             
403             if (m->control_pressed) {  return 1; }
404             
405             string binnames = list->get(i);
406             vector<string> names;
407             m->splitAtComma(binnames, names);
408             for (int j = 0; j < names.size(); j++) {
409                 string name = names[j];
410                 
411                 //do work for that name
412                 string sequence = fasta->getSequence(name);
413                 
414                 if (countfile != "") {
415                     if (sequence != "not found") {
416                         if (ct.hasGroupInfo()) {
417                             vector<string> groups = ct.getGroups(name);
418                             string groupInfo = "";
419                             for (int k = 0; k < groups.size()-1; k++) {
420                                 groupInfo += groups[k] + "-";
421                             }
422                             if (groups.size() != 0) { groupInfo += groups[groups.size()-1]; }
423                             else { groupInfo = "not found";  }
424                             name = name + "\t" + groupInfo + "\t" + toString(i+1)+ "\tNumRep=" + toString(ct.getNumSeqs(name));
425                             out << ">" << name << endl;
426                             out << sequence << endl;
427                         }else {
428                             name = name + "\t" + toString(i+1) + "\tNumRep=" + toString(ct.getNumSeqs(name));
429                             out << ">" << name << endl;
430                             out << sequence << endl;
431                         }
432                         
433                     }else { m->mothurOut(name + " is missing from your fasta. Does your list file contain all sequence names or just the uniques?"); m->mothurOutEndLine(); return 1; }
434                 }else {
435                     if (sequence != "not found") {
436                         //if you don't have groups
437                         if (groupfile == "") {
438                             name = name + "\t" + toString(i+1);
439                             out << ">" << name << endl;
440                             out << sequence << endl;
441                         }else {//if you do have groups
442                             string group = groupMap->getGroup(name);
443                             if (group == "not found") {  
444                                 m->mothurOut(name + " is missing from your group file. Please correct. ");  m->mothurOutEndLine();
445                                 return 1;
446                             }else{
447                                 name = name + "\t" + group + "\t" + toString(i+1);
448                                 out << ">" << name << endl;
449                                 out << sequence << endl;
450                             }
451                         }
452                     }else { m->mothurOut(name + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine(); return 1; }
453                 }
454             }
455         }
456         
457         out.close();
458         return 0;
459
460         }
461         catch(exception& e) {
462                 m->errorOut(e, "BinSeqCommand", "process");
463                 exit(1);
464         }
465 }
466 //**********************************************************************************************************************
467
468