]> git.donarmstrong.com Git - mothur.git/blob - parselistscommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / parselistscommand.cpp
1 /*
2  *  parselistcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 2/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "parselistscommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ParseListCommand::setParameters(){       
14         try {
15                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(plist);
16         CommandParameter pcount("count", "InputTypes", "", "", "CountGroup", "CountGroup", "none","",false,false,true); parameters.push_back(pcount);
17                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "CountGroup", "none","",false,false,true); parameters.push_back(pgroup);
18                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
19                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
20                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
21                 
22                 vector<string> myArray;
23                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
24                 return myArray;
25         }
26         catch(exception& e) {
27                 m->errorOut(e, "ParseListCommand", "setParameters");
28                 exit(1);
29         }
30 }
31 //**********************************************************************************************************************
32 string ParseListCommand::getHelpString(){       
33         try {
34                 string helpString = "";
35                 helpString += "The parse.list command reads a list and group or count file and generates a list file for each group in the group or count file. \n";
36                 helpString += "The parse.list command parameters are list, group, count and label.\n";
37                 helpString += "The list and group or count parameters are required.\n";
38         helpString += "If a count file is provided, mothur assumes the list file contains only unique names.\n";
39         helpString += "If a group file is provided, mothur assumes the list file contains all names.\n";
40                 helpString += "The label parameter is used to read specific labels in your input you want to use.\n";
41                 helpString += "The parse.list command should be used in the following format: parse.list(list=yourListFile, group=yourGroupFile, label=yourLabels).\n";
42                 helpString += "Example: parse.list(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03).\n";
43                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
44                 return helpString;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "ParseListCommand", "getHelpString");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string ParseListCommand::getOutputPattern(string type) {
53     try {
54         string pattern = "";
55         
56         if (type == "list") {  pattern = "[filename],[group],list"; } 
57         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
58         
59         return pattern;
60     }
61     catch(exception& e) {
62         m->errorOut(e, "ParseListCommand", "getOutputPattern");
63         exit(1);
64     }
65 }
66 //**********************************************************************************************************************
67 ParseListCommand::ParseListCommand(){   
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["list"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "ParseListCommand", "ParseListCommand");
76                 exit(1);
77         }
78 }
79 //**********************************************************************************************************************
80 ParseListCommand::ParseListCommand(string option)  {
81         try {
82                 abort = false; calledHelp = false;   
83                 allLines = 1;
84                         
85                 //allow user to run help
86                 if(option == "help") { help(); abort = true; calledHelp = true; }
87                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
88                 
89                 else {
90                         vector<string> myArray = setParameters();
91                         
92                         OptionParser parser(option);
93                         map<string, string> parameters = parser.getParameters();
94                         
95                         ValidParameters validParameter;
96                         map<string, string>::iterator it;
97                 
98                         //check to make sure all parameters are valid for command
99                         for (it = parameters.begin(); it != parameters.end(); it++) { 
100                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
101                         }
102                         
103                         //initialize outputTypes
104                         vector<string> tempOutNames;
105                         outputTypes["list"] = tempOutNames;                     
106                                                                                                 
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("list");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
118                                 }
119                                 
120                                 it = parameters.find("group");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
126                                 }
127                 
128                 it = parameters.find("count");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
134                                 }
135                         }
136
137                         
138                         
139
140                         //check for required parameters
141                         listfile = validParameter.validFile(parameters, "list", true);
142                         if (listfile == "not open") { abort = true; }
143                         else if (listfile == "not found") { 
144                                 listfile = m->getListFile(); 
145                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
146                                 else { 
147                                         m->mothurOut("No valid current list file. You must provide a list file."); m->mothurOutEndLine(); 
148                                         abort = true;
149                                                 
150                                 }
151                         }else { m->setListFile(listfile); }     
152             
153             //if the user changes the output directory command factory will send this info to us in the output parameter 
154                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(listfile);       }
155                         
156             groupfile = validParameter.validFile(parameters, "group", true);
157                         if (groupfile == "not found") { groupfile =  "";   groupMap = NULL; }
158                         else if (groupfile == "not open") { abort = true; groupfile =  ""; groupMap = NULL; }   
159                         else {   
160                 m->setGroupFile(groupfile);
161                                 groupMap = new GroupMap(groupfile);
162                                 
163                                 int error = groupMap->readMap();
164                                 if (error == 1) { abort = true; }
165             }
166             
167             countfile = validParameter.validFile(parameters, "count", true);
168                         if (countfile == "not found") { countfile =  "";   }
169                         else if (countfile == "not open") { abort = true; countfile =  ""; }    
170                         else {   
171                 m->setCountTableFile(countfile); 
172                 ct.readTable(countfile, true);
173                 if (!ct.hasGroupInfo()) { 
174                     abort = true;
175                     m->mothurOut("[ERROR]: The parse.list command requires group info to be present in your countfile, quitting."); m->mothurOutEndLine();
176                 }
177                     
178             }
179             
180             if ((groupfile != "") && (countfile != "")) {
181                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
182             }else if ((groupfile == "") && (countfile == "")) {
183                 m->mothurOut("[ERROR]: you must provide one of the following: group or count."); m->mothurOutEndLine(); abort=true;
184             }
185                         
186                         //check for optional parameter and set defaults
187                         // ...at some point should added some additional type checking...
188                         label = validParameter.validFile(parameters, "label", false);                   
189                         if (label == "not found") { label = "";  allLines = 1; }
190                         else { 
191                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
192                                 else { allLines = 1;  }
193                         }
194                 }
195
196         }
197         catch(exception& e) {
198                 m->errorOut(e, "ParseListCommand", "ParseListCommand");
199                 exit(1);
200         }
201 }
202 //**********************************************************************************************************************
203 int ParseListCommand::execute(){
204         try {
205         
206                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
207                 
208                 //set fileroot
209                 map<string, string> variables; 
210         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
211                 
212                 //fill filehandles with neccessary ofstreams
213                 int i;
214                 ofstream* temp;
215                 vector<string> gGroups;
216         if (groupfile != "") { gGroups = groupMap->getNamesOfGroups(); }
217         else { gGroups = ct.getNamesOfGroups(); }
218         
219                 for (i=0; i<gGroups.size(); i++) {
220                         temp = new ofstream;
221                         filehandles[gGroups[i]] = temp;
222                         
223             variables["[group]"] = gGroups[i];
224                         string filename = getOutputFileName("list",variables);
225                         outputNames.push_back(filename); outputTypes["list"].push_back(filename);
226                         m->openOutputFile(filename, *temp);
227                 }
228                 
229                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
230                 set<string> processedLabels;
231                 set<string> userLabels = labels;        
232         
233                 InputData input(listfile, "list");
234                 list = input.getListVector();
235                 string lastLabel = list->getLabel();
236                 
237                 if (m->control_pressed) { 
238                         delete list; if (groupfile != "") { delete groupMap; }
239                         for (i=0; i<gGroups.size(); i++) {  (*(filehandles[gGroups[i]])).close();  delete filehandles[gGroups[i]]; } 
240                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
241                         return 0;
242                 }
243                 
244                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
245                 
246                         if (m->control_pressed) { 
247                                 delete list; if (groupfile != "") { delete groupMap; }
248                                 for (i=0; i<gGroups.size(); i++) {  (*(filehandles[gGroups[i]])).close();  delete filehandles[gGroups[i]]; } 
249                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
250                                 return 0;
251                         }
252                         
253                         if(allLines == 1 || labels.count(list->getLabel()) == 1){
254                                         
255                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
256                                         parse(list);
257                                                                                 
258                                         processedLabels.insert(list->getLabel());
259                                         userLabels.erase(list->getLabel());
260                         }
261                         
262                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
263                                         string saveLabel = list->getLabel();
264                                         
265                                         list = input.getListVector(lastLabel); //get new list vector to process
266                                         
267                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
268                                         parse(list);
269                                         
270                                         processedLabels.insert(list->getLabel());
271                                         userLabels.erase(list->getLabel());
272                                         
273                                         //restore real lastlabel to save below
274                                         list->setLabel(saveLabel);
275                         }
276                         
277                 
278                         lastLabel = list->getLabel();
279                                 
280                         delete list;
281                         list = input.getListVector(); //get new list vector to process
282                 }
283                 
284                 if (m->control_pressed) { 
285                         if (groupfile != "") { delete groupMap; }
286                         for (i=0; i<gGroups.size(); i++) {  (*(filehandles[gGroups[i]])).close();  delete filehandles[gGroups[i]]; } 
287                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
288                         return 0;
289                 }
290                 
291                 //output error messages about any remaining user labels
292                 set<string>::iterator it;
293                 bool needToRun = false;
294                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
295                         m->mothurOut("Your file does not include the label " + *it); 
296                         if (processedLabels.count(lastLabel) != 1) {
297                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
298                                 needToRun = true;
299                         }else {
300                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
301                         }
302
303                 }
304                 
305                 if (m->control_pressed) { 
306                         if (groupfile != "") { delete groupMap; }
307                         for (i=0; i<gGroups.size(); i++) {  (*(filehandles[gGroups[i]])).close();  delete filehandles[gGroups[i]]; } 
308                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
309                         return 0;
310                 }
311                 
312                 //run last label if you need to
313                 if (needToRun == true)  {
314                         if (list != NULL) {     delete list;    }
315                         list = input.getListVector(lastLabel); //get new list vector to process
316                         
317                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
318                         parse(list);            
319                         
320                         delete list;
321                 }
322                 
323                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
324                         (*(filehandles[it3->first])).close();
325                         delete it3->second;
326                 }
327                 
328                 if (groupfile != "") { delete groupMap; }
329                 
330                 if (m->control_pressed) { 
331                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
332                         return 0;
333                 }
334                 
335                 //set fasta file as new current fastafile
336                 string current = "";
337                 itTypes = outputTypes.find("list");
338                 if (itTypes != outputTypes.end()) {
339                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
340                 }
341                         
342                 m->mothurOutEndLine();
343                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
344                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
345                 m->mothurOutEndLine();
346                 
347                 return 0;
348         }
349         catch(exception& e) {
350                 m->errorOut(e, "ParseListCommand", "execute");
351                 exit(1);
352         }
353 }
354 /**********************************************************************************************************************/
355 int ParseListCommand::parse(ListVector* thisList) {
356         try {
357         
358                 map<string, string> groupVector;
359                 map<string, string>::iterator itGroup;
360                 map<string, int> groupNumBins;
361                 
362                 //print label
363                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
364                         groupNumBins[it3->first] = 0;
365                         groupVector[it3->first] = "";
366                 }
367
368                 
369                 for (int i = 0; i < thisList->getNumBins(); i++) {
370                         if (m->control_pressed) { return 0; }
371                         
372                         map<string, string> groupBins;
373                         string bin = list->get(i); 
374                         
375                         vector<string> names;
376                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
377                         
378                         //parse bin into list of sequences in each group
379                         for (int j = 0; j < names.size(); j++) {
380                 if (groupfile != "") {
381                     string group = groupMap->getGroup(names[j]);
382                                 
383                     if (group == "not found") { m->mothurOut(names[j] + " is not in your groupfile. please correct."); m->mothurOutEndLine(); exit(1); }
384                                 
385                     itGroup = groupBins.find(group);
386                     if(itGroup == groupBins.end()) {
387                         groupBins[group] = names[j];  //add first name
388                         groupNumBins[group]++;
389                     }else{ //add another name
390                         groupBins[group] = groupBins[group] + "," + names[j];
391                     }
392                 }else{
393                     vector<string> thisSeqsGroups = ct.getGroups(names[j]);
394                     
395                     for (int k = 0; k < thisSeqsGroups.size(); k++) {
396                         string group = thisSeqsGroups[k];
397                         itGroup = groupBins.find(group);
398                         if(itGroup == groupBins.end()) {
399                             groupBins[group] = names[j];  //add first name
400                             groupNumBins[group]++;
401                         }else{ //add another name
402                             groupBins[group] = groupBins[group] + "," + names[j];
403                         }
404
405                     }
406                 }
407                         }
408                         
409                         //print parsed bin info to files
410                         for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
411                                 groupVector[itGroup->first] +=  itGroup->second + '\t'; 
412                         }
413                 
414                 }
415                 
416                 //end list vector
417                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
418                         (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
419                 }
420                 
421                 return 0;
422
423         }
424         catch(exception& e) {
425                 m->errorOut(e, "ParseListCommand", "parse");
426                 exit(1);
427         }
428 }
429
430 /**********************************************************************************************************************/
431
432