]> git.donarmstrong.com Git - mothur.git/blob - parselistscommand.cpp
fixes while testing 1.33.0
[mothur.git] / parselistscommand.cpp
1 /*
2  *  parselistcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 2/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "parselistscommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ParseListCommand::setParameters(){       
14         try {
15                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(plist);
16         CommandParameter pcount("count", "InputTypes", "", "", "CountGroup", "CountGroup", "none","",false,false,true); parameters.push_back(pcount);
17                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "CountGroup", "none","",false,false,true); parameters.push_back(pgroup);
18                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
19                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
20                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
21                 
22                 vector<string> myArray;
23                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
24                 return myArray;
25         }
26         catch(exception& e) {
27                 m->errorOut(e, "ParseListCommand", "setParameters");
28                 exit(1);
29         }
30 }
31 //**********************************************************************************************************************
32 string ParseListCommand::getHelpString(){       
33         try {
34                 string helpString = "";
35                 helpString += "The parse.list command reads a list and group or count file and generates a list file for each group in the group or count file. \n";
36                 helpString += "The parse.list command parameters are list, group, count and label.\n";
37                 helpString += "The list and group or count parameters are required.\n";
38         helpString += "If a count file is provided, mothur assumes the list file contains only unique names.\n";
39         helpString += "If a group file is provided, mothur assumes the list file contains all names.\n";
40                 helpString += "The label parameter is used to read specific labels in your input you want to use.\n";
41                 helpString += "The parse.list command should be used in the following format: parse.list(list=yourListFile, group=yourGroupFile, label=yourLabels).\n";
42                 helpString += "Example: parse.list(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03).\n";
43                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
44                 return helpString;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "ParseListCommand", "getHelpString");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string ParseListCommand::getOutputPattern(string type) {
53     try {
54         string pattern = "";
55         
56         if (type == "list") {  pattern = "[filename],[group],[distance],list"; } 
57         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
58         
59         return pattern;
60     }
61     catch(exception& e) {
62         m->errorOut(e, "ParseListCommand", "getOutputPattern");
63         exit(1);
64     }
65 }
66 //**********************************************************************************************************************
67 ParseListCommand::ParseListCommand(){   
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["list"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "ParseListCommand", "ParseListCommand");
76                 exit(1);
77         }
78 }
79 //**********************************************************************************************************************
80 ParseListCommand::ParseListCommand(string option)  {
81         try {
82                 abort = false; calledHelp = false;   
83                 allLines = 1;
84                         
85                 //allow user to run help
86                 if(option == "help") { help(); abort = true; calledHelp = true; }
87                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
88                 
89                 else {
90                         vector<string> myArray = setParameters();
91                         
92                         OptionParser parser(option);
93                         map<string, string> parameters = parser.getParameters();
94                         
95                         ValidParameters validParameter;
96                         map<string, string>::iterator it;
97                 
98                         //check to make sure all parameters are valid for command
99                         for (it = parameters.begin(); it != parameters.end(); it++) { 
100                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
101                         }
102                         
103                         //initialize outputTypes
104                         vector<string> tempOutNames;
105                         outputTypes["list"] = tempOutNames;                     
106                                                                                                 
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("list");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
118                                 }
119                                 
120                                 it = parameters.find("group");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
126                                 }
127                 
128                 it = parameters.find("count");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
134                                 }
135                         }
136
137                         
138                         
139
140                         //check for required parameters
141                         listfile = validParameter.validFile(parameters, "list", true);
142                         if (listfile == "not open") { abort = true; }
143                         else if (listfile == "not found") { 
144                                 listfile = m->getListFile(); 
145                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
146                                 else { 
147                                         m->mothurOut("No valid current list file. You must provide a list file."); m->mothurOutEndLine(); 
148                                         abort = true;
149                                                 
150                                 }
151                         }else { m->setListFile(listfile); }     
152             
153             //if the user changes the output directory command factory will send this info to us in the output parameter 
154                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(listfile);       }
155                         
156             groupfile = validParameter.validFile(parameters, "group", true);
157                         if (groupfile == "not found") { groupfile =  "";   groupMap = NULL; }
158                         else if (groupfile == "not open") { abort = true; groupfile =  ""; groupMap = NULL; }   
159                         else {   
160                 m->setGroupFile(groupfile);
161                                 groupMap = new GroupMap(groupfile);
162                                 
163                                 int error = groupMap->readMap();
164                                 if (error == 1) { abort = true; }
165             }
166             
167             countfile = validParameter.validFile(parameters, "count", true);
168                         if (countfile == "not found") { countfile =  "";   }
169                         else if (countfile == "not open") { abort = true; countfile =  ""; }    
170                         else {   
171                 m->setCountTableFile(countfile); 
172                 ct.readTable(countfile, true, false);
173                 if (!ct.hasGroupInfo()) { 
174                     abort = true;
175                     m->mothurOut("[ERROR]: The parse.list command requires group info to be present in your countfile, quitting."); m->mothurOutEndLine();
176                 }
177                     
178             }
179             
180             if ((groupfile != "") && (countfile != "")) {
181                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
182             }else if ((groupfile == "") && (countfile == "")) {
183                 m->mothurOut("[ERROR]: you must provide one of the following: group or count."); m->mothurOutEndLine(); abort=true;
184             }
185                         
186                         //check for optional parameter and set defaults
187                         // ...at some point should added some additional type checking...
188                         label = validParameter.validFile(parameters, "label", false);                   
189                         if (label == "not found") { label = "";  allLines = 1; }
190                         else { 
191                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
192                                 else { allLines = 1;  }
193                         }
194                 }
195
196         }
197         catch(exception& e) {
198                 m->errorOut(e, "ParseListCommand", "ParseListCommand");
199                 exit(1);
200         }
201 }
202 //**********************************************************************************************************************
203 int ParseListCommand::execute(){
204         try {
205         
206                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
207                 
208                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
209                 set<string> processedLabels;
210                 set<string> userLabels = labels;        
211         
212                 InputData input(listfile, "list");
213                 list = input.getListVector();
214                 string lastLabel = list->getLabel();
215                 
216                 if (m->control_pressed) { 
217                         delete list; if (groupfile != "") { delete groupMap; }
218                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
219                 }
220                 
221                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
222                 
223                         if (m->control_pressed) { 
224                                 delete list; if (groupfile != "") { delete groupMap; }
225                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
226                                 return 0;
227                         }
228                         
229                         if(allLines == 1 || labels.count(list->getLabel()) == 1){
230                                         
231                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
232                                         parse(list);
233                                                                                 
234                                         processedLabels.insert(list->getLabel());
235                                         userLabels.erase(list->getLabel());
236                         }
237                         
238                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
239                                         string saveLabel = list->getLabel();
240                                         
241                                         list = input.getListVector(lastLabel); //get new list vector to process
242                                         
243                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
244                                         parse(list);
245                                         
246                                         processedLabels.insert(list->getLabel());
247                                         userLabels.erase(list->getLabel());
248                                         
249                                         //restore real lastlabel to save below
250                                         list->setLabel(saveLabel);
251                         }
252                         
253                 
254                         lastLabel = list->getLabel();
255                                 
256                         delete list;
257                         list = input.getListVector(); //get new list vector to process
258                 }
259                 
260                 if (m->control_pressed) { 
261                         if (groupfile != "") { delete groupMap; }
262                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
263                         return 0;
264                 }
265                 
266                 //output error messages about any remaining user labels
267                 set<string>::iterator it;
268                 bool needToRun = false;
269                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
270                         m->mothurOut("Your file does not include the label " + *it); 
271                         if (processedLabels.count(lastLabel) != 1) {
272                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
273                                 needToRun = true;
274                         }else {
275                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
276                         }
277
278                 }
279                 
280                 if (m->control_pressed) { 
281                         if (groupfile != "") { delete groupMap; }
282                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
283                         return 0;
284                 }
285                 
286                 //run last label if you need to
287                 if (needToRun == true)  {
288                         if (list != NULL) {     delete list;    }
289                         list = input.getListVector(lastLabel); //get new list vector to process
290                         
291                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
292                         parse(list);            
293                         
294                         delete list;
295                 }
296                 
297                 if (groupfile != "") { delete groupMap; }
298                 
299                 if (m->control_pressed) { 
300                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
301                         return 0;
302                 }
303                 
304                 //set fasta file as new current fastafile
305                 string current = "";
306                 itTypes = outputTypes.find("list");
307                 if (itTypes != outputTypes.end()) {
308                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
309                 }
310                         
311                 m->mothurOutEndLine();
312                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
313                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
314                 m->mothurOutEndLine();
315                 
316                 return 0;
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "ParseListCommand", "execute");
320                 exit(1);
321         }
322 }
323 /**********************************************************************************************************************/
324 int ParseListCommand::parse(ListVector* thisList) {
325         try {
326         map<string, ofstream*> filehandles;
327         map<string, ofstream*>::iterator it3;
328         
329         //set fileroot
330                 map<string, string> variables;
331         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
332         variables["[distance]"] = thisList->getLabel();
333                 
334                 //fill filehandles with neccessary ofstreams
335                 ofstream* temp;
336                 vector<string> gGroups;
337         if (groupfile != "") { gGroups = groupMap->getNamesOfGroups(); }
338         else { gGroups = ct.getNamesOfGroups(); }
339         
340                 for (int i=0; i<gGroups.size(); i++) {
341                         temp = new ofstream;
342                         filehandles[gGroups[i]] = temp;
343                         
344             variables["[group]"] = gGroups[i];
345                         string filename = getOutputFileName("list",variables);
346                         m->openOutputFile(filename, *temp);
347             outputNames.push_back(filename); outputTypes["list"].push_back(filename);
348                 }
349
350         
351                 map<string, string> groupVector;
352         map<string, string> groupLabels;
353                 map<string, string>::iterator itGroup;
354                 map<string, int> groupNumBins;
355                 
356                 //print label
357                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
358                         groupNumBins[it3->first] = 0;
359                         groupVector[it3->first] = "";
360             groupLabels[it3->first] = "label\tnumOtus\t";
361                 }
362
363                 vector<string> binLabels = thisList->getLabels();
364                 for (int i = 0; i < thisList->getNumBins(); i++) {
365                         if (m->control_pressed) { break; }
366                         
367                         map<string, string> groupBins;
368                         string bin = list->get(i); 
369                         
370                         vector<string> names;
371                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
372                         
373                         //parse bin into list of sequences in each group
374                         for (int j = 0; j < names.size(); j++) {
375                 if (groupfile != "") {
376                     string group = groupMap->getGroup(names[j]);
377                                 
378                     if (group == "not found") { m->mothurOut(names[j] + " is not in your groupfile. please correct."); m->mothurOutEndLine(); exit(1); }
379                                 
380                     itGroup = groupBins.find(group);
381                     if(itGroup == groupBins.end()) {
382                         groupBins[group] = names[j];  //add first name
383                         groupNumBins[group]++;
384                     }else{ //add another name
385                         groupBins[group] = groupBins[group] + "," + names[j];
386                     }
387                 }else{
388                     vector<string> thisSeqsGroups = ct.getGroups(names[j]);
389                     
390                     for (int k = 0; k < thisSeqsGroups.size(); k++) {
391                         string group = thisSeqsGroups[k];
392                         itGroup = groupBins.find(group);
393                         if(itGroup == groupBins.end()) {
394                             groupBins[group] = names[j];  //add first name
395                             groupNumBins[group]++;
396                         }else{ //add another name
397                             groupBins[group] = groupBins[group] + "," + names[j];
398                         }
399
400                     }
401                 }
402                         }
403                         
404                         //print parsed bin info to files
405                         for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
406                                 groupVector[itGroup->first] +=  itGroup->second + '\t';
407                 groupLabels[itGroup->first] +=  binLabels[i] + '\t';
408                         }
409                 
410                 }
411                 
412         if (m->control_pressed) {
413             for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
414                 (*(filehandles[it3->first])).close();
415                 delete it3->second;
416             }
417             return 0;
418         }
419         
420                 //end list vector
421                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
422             (*(filehandles[it3->first])) << groupLabels[it3->first] << endl;
423                         (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
424             (*(filehandles[it3->first])).close();
425             delete it3->second;
426                 }
427                 
428                 return 0;
429
430         }
431         catch(exception& e) {
432                 m->errorOut(e, "ParseListCommand", "parse");
433                 exit(1);
434         }
435 }
436
437 /**********************************************************************************************************************/
438
439