]> git.donarmstrong.com Git - mothur.git/blob - binsequencecommand.cpp
added logfile feature
[mothur.git] / binsequencecommand.cpp
1 /*
2  *  binsequencecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/3/09.
6  *  Copyright 2009 Schloss Lab UMASS Amhers. All rights reserved.
7  *
8  */
9
10 #include "binsequencecommand.h"
11
12 //**********************************************************************************************************************
13 BinSeqCommand::BinSeqCommand(string option){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                 allLines = 1;
18                 lines.clear();
19                 labels.clear();
20                 
21                 //allow user to run help
22                 if(option == "help") { help(); abort = true; }
23                 
24                 else {
25                         //valid paramters for this command
26                         string AlignArray[] =  {"fasta","line","label","name", "group"};
27                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
28                         
29                         OptionParser parser(option);
30                         map<string, string> parameters = parser.getParameters();
31                         
32                         ValidParameters validParameter;
33                 
34                         //check to make sure all parameters are valid for command
35                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
36                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
37                         }
38                         
39                         //make sure the user has already run the read.otu command
40                         if (globaldata->getListFile() == "") { 
41                                 mothurOut("You must read a listfile before running the bin.seqs command."); 
42                                 mothurOutEndLine(); 
43                                 abort = true; 
44                         }
45                         
46                         
47                         //check for required parameters
48                         fastafile = validParameter.validFile(parameters, "fasta", true);
49                         if (fastafile == "not found") { mothurOut("fasta is a required parameter for the bin.seqs command.");  mothurOutEndLine(); abort = true; }
50                         else if (fastafile == "not open") { abort = true; }     
51                 
52                         //check for optional parameter and set defaults
53                         // ...at some point should added some additional type checking...
54                         line = validParameter.validFile(parameters, "line", false);                             
55                         if (line == "not found") { line = "";  }
56                         else { 
57                                 if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
58                                 else { allLines = 1;  }
59                         }
60                         
61                         label = validParameter.validFile(parameters, "label", false);                   
62                         if (label == "not found") { label = ""; }
63                         else { 
64                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
65                                 else { allLines = 1;  }
66                         }
67                         
68                         //make sure user did not use both the line and label parameters
69                         if ((line != "") && (label != "")) { mothurOut("You cannot use both the line and label parameters at the same time. "); mothurOutEndLine(); abort = true; }
70                         //if the user has not specified any line or labels use the ones from read.otu
71                         else if ((line == "") && (label == "")) {  
72                                 allLines = globaldata->allLines; 
73                                 labels = globaldata->labels; 
74                                 lines = globaldata->lines;
75                         }
76                         
77                         namesfile = validParameter.validFile(parameters, "name", true);
78                         if (namesfile == "not open") { abort = true; }  
79                         else if (namesfile == "not found") { namesfile = ""; }
80
81                         groupfile = validParameter.validFile(parameters, "group", true);
82                         if (groupfile == "not open") { abort = true; }
83                         else if (groupfile == "not found") { groupfile = ""; }
84                         
85                         if (abort == false) { 
86 //                              openInputFile(fastafile, in);
87                                 fasta = new FastaMap();
88                                 if (groupfile != "") {
89                                         groupMap = new GroupMap(groupfile);
90                                         groupMap->readMap();
91                                 }
92                         }
93         
94                 }
95         }
96         catch(exception& e) {
97                 errorOut(e, "BinSeqCommand", "BinSeqCommand");
98                 exit(1);
99         }
100 }
101 //**********************************************************************************************************************
102
103 void BinSeqCommand::help(){
104         try {
105                 mothurOut("The bin.seqs command can only be executed after a successful read.otu command of a listfile.\n");
106                 mothurOut("The bin.seqs command parameters are fasta, name, line, label and group.  The fasta parameter is required, and you may not use line and label at the same time.\n");
107                 mothurOut("The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes.\n");
108                 mothurOut("The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, line=yourLines, label=yourLabels).\n");
109                 mothurOut("Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, line=1-3-5, name=amazon.names).\n");
110                 mothurOut("The default value for line and label are all lines in your inputfile.\n");
111                 mothurOut("The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name.\n");
112                 mothurOut("If you provide a groupfile, then it also appends the sequences group to the name.\n");
113                 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
114         }
115         catch(exception& e) {
116                 errorOut(e, "BinSeqCommand", "help");
117                 exit(1);
118         }
119 }
120
121 //**********************************************************************************************************************
122
123 BinSeqCommand::~BinSeqCommand(){
124         //made new in execute
125         if (abort == false) {
126                 delete input;  globaldata->ginput = NULL;
127                 delete read;
128                 globaldata->gListVector = NULL;
129                 delete fasta;
130                 if (groupfile != "") {  delete groupMap;  globaldata->gGroupmap = NULL; }
131         }
132 }
133
134 //**********************************************************************************************************************
135
136 int BinSeqCommand::execute(){
137         try {
138                 if (abort == true) {    return 0;       }
139         
140                 int count = 1;
141                 int error = 0;
142                 
143                 //read fastafile
144                 fasta->readFastaFile(fastafile);
145                 
146                 
147                 //set format to list so input can get listvector
148 //              globaldata->setFormat("list");
149                 
150                 //if user gave a namesfile then use it
151                 if (namesfile != "") {
152                         readNamesFile();
153                 }
154                 
155                 //read list file
156                 read = new ReadOTUFile(globaldata->getListFile());      
157                 read->read(&*globaldata); 
158                 
159                 input = globaldata->ginput;
160                 list = globaldata->gListVector;
161                 string lastLabel = list->getLabel();
162                 
163                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
164                 set<string> processedLabels;
165                 set<string> userLabels = labels;
166                 set<int> userLines = lines;
167
168                                 
169                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
170                         
171                         if(allLines == 1 || lines.count(count) == 1 || labels.count(list->getLabel()) == 1){
172                                 
173                                 error = process(list, count);   
174                                 if (error == 1) { return 0; }   
175                                                         
176                                 processedLabels.insert(list->getLabel());
177                                 userLabels.erase(list->getLabel());
178                                 userLines.erase(count);
179                         }
180                         
181                         if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
182                                 delete list;
183                                 list = input->getListVector(lastLabel);
184                                 
185                                 error = process(list, count);   
186                                 if (error == 1) { return 0; }
187                                                                                                         
188                                 processedLabels.insert(list->getLabel());
189                                 userLabels.erase(list->getLabel());
190                                 
191                         }
192                         
193                         lastLabel = list->getLabel();                   
194                         
195                         delete list;
196                         list = input->getListVector();
197                         count++;
198                 }
199                 
200                 
201                 //output error messages about any remaining user labels
202                 set<string>::iterator it;
203                 bool needToRun = false;
204                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
205                         mothurOut("Your file does not include the label " + *it); 
206                         if (processedLabels.count(lastLabel) != 1) {
207                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
208                                 needToRun = true;
209                         }else {
210                                 mothurOut(". Please refer to " + lastLabel + ".");  mothurOutEndLine();
211                         }
212                 }
213                 
214                 //run last line if you need to
215                 if (needToRun == true)  {
216                         delete list;
217                         list = input->getListVector(lastLabel);
218                                 
219                         error = process(list, count);   
220                         if (error == 1) { return 0; }
221                         
222                         delete list;  
223                 }
224                 
225                 return 0;
226         }
227         catch(exception& e) {
228                 errorOut(e, "BinSeqCommand", "execute");
229                 exit(1);
230         }
231 }
232
233 //**********************************************************************************************************************
234 void BinSeqCommand::readNamesFile() {
235         try {
236                 vector<string> dupNames;
237                 openInputFile(namesfile, inNames);
238                 
239                 string name, names, sequence;
240         
241                 while(inNames){
242                         inNames >> name;                        //read from first column  A
243                         inNames >> names;               //read from second column  A,B,C,D
244                         
245                         dupNames.clear();
246                         
247                         //parse names into vector
248                         splitAtComma(names, dupNames);
249                         
250                         //store names in fasta map
251                         sequence = fasta->getSequence(name);
252                         for (int i = 0; i < dupNames.size(); i++) {
253                                 fasta->push_back(dupNames[i], sequence);
254                         }
255                 
256                         gobble(inNames);
257                 }
258                 inNames.close();
259
260         }
261         catch(exception& e) {
262                 errorOut(e, "BinSeqCommand", "readNamesFile");
263                 exit(1);
264         }
265 }
266 //**********************************************************************************************************************
267 //return 1 if error, 0 otherwise
268 int BinSeqCommand::process(ListVector* list, int count) {
269         try {
270                                 string binnames, name, sequence;
271                                 string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta";
272                                 openOutputFile(outputFileName, out);
273
274                                 mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
275                                 
276                                 //for each bin in the list vector
277                                 for (int i = 0; i < list->size(); i++) {
278
279                                         binnames = list->get(i);
280                                         while (binnames.find_first_of(',') != -1) { 
281                                                 name = binnames.substr(0,binnames.find_first_of(','));
282                                                 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
283                                                 
284                                                 //do work for that name
285                                                 sequence = fasta->getSequence(name);
286                                                 if (sequence != "not found") {
287                                                         //if you don't have groups
288                                                         if (groupfile == "") {
289                                                                 name = name + "|" + toString(i+1);
290                                                                 out << ">" << name << endl;
291                                                                 out << sequence << endl;
292                                                         }else {//if you do have groups
293                                                                 string group = groupMap->getGroup(name);
294                                                                 if (group == "not found") {  
295                                                                         mothurOut(name + " is missing from your group file. Please correct. ");  mothurOutEndLine();
296                                                                         remove(outputFileName.c_str());
297                                                                         return 1;
298                                                                 }else{
299                                                                         name = name + "|" + group + "|" + toString(i+1);
300                                                                         out << ">" << name << endl;
301                                                                         out << sequence << endl;
302                                                                 }
303                                                         }
304                                                 }else { 
305                                                         mothurOut(name + " is missing from your fasta or name file. Please correct. "); mothurOutEndLine();
306                                                         remove(outputFileName.c_str());
307                                                         return 1;
308                                                 }
309                                                 
310                                         }
311                                         
312                                         //get last name
313                                         sequence = fasta->getSequence(binnames);
314                                         if (sequence != "not found") {
315                                                 //if you don't have groups
316                                                 if (groupfile == "") {
317                                                         binnames = binnames + "|" + toString(i+1);
318                                                         out << ">" << binnames << endl;
319                                                         out << sequence << endl;
320                                                 }else {//if you do have groups
321                                                         string group = groupMap->getGroup(binnames);
322                                                         if (group == "not found") {  
323                                                                 mothurOut(binnames + " is missing from your group file. Please correct. "); mothurOutEndLine();
324                                                                 remove(outputFileName.c_str());
325                                                                 return 1;
326                                                         }else{
327                                                                 binnames = binnames + "|" + group + "|" + toString(i+1);
328                                                                 out << ">" << binnames << endl;
329                                                                 out << sequence << endl;
330                                                         }
331                                                 }
332                                         }else { 
333                                                 mothurOut(binnames + " is missing from your fasta or name file. Please correct. "); mothurOutEndLine();
334                                                 remove(outputFileName.c_str());
335                                                 return 1;
336                                         }
337                                 }
338                                         
339                                 out.close();
340                                 return 0;
341
342         }
343         catch(exception& e) {
344                 errorOut(e, "BinSeqCommand", "process");
345                 exit(1);
346         }
347 }
348 //**********************************************************************************************************************
349
350