]> git.donarmstrong.com Git - mothur.git/blob - catchallcommand.cpp
added summary output to catchall command
[mothur.git] / catchallcommand.cpp
1 /*
2  *  catchallcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/11/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "catchallcommand.h"
11 #include "globaldata.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> CatchAllCommand::getValidParameters(){   
15         try {
16                 string AlignArray[] =  {"sabund","label","inputdir","outputdir"};
17                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "CatchAllCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 CatchAllCommand::CatchAllCommand(){     
27         try {
28                 //initialize outputTypes
29                 vector<string> tempOutNames;
30                 outputTypes["csv"] = tempOutNames;
31                 outputTypes["summary"] = tempOutNames;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> CatchAllCommand::getRequiredParameters(){        
40         try {
41                 string AlignArray[] =  {"sabund"};
42                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "CatchAllCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> CatchAllCommand::getRequiredFiles(){     
52         try {
53                 vector<string> myArray;
54                 return myArray;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "CatchAllCommand", "getRequiredFiles");
58                 exit(1);
59         }
60 }
61 /**************************************************************************************/
62 CatchAllCommand::CatchAllCommand(string option)  {      
63         try {
64                 globaldata = GlobalData::getInstance();
65                 abort = false;
66                 allLines = 1;
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string Array[] =  {"sabund","label","inputdir","outputdir"};
74                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string,string> parameters = parser.getParameters();
78                         
79                         ValidParameters validParameter;
80                         map<string, string>::iterator it;
81                 
82                         //check to make sure all parameters are valid for command
83                         for (it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         
87                         //initialize outputTypes
88                         vector<string> tempOutNames;
89                         outputTypes["csv"] = tempOutNames;
90                         outputTypes["summary"] = tempOutNames;
91                         
92                         //if the user changes the input directory command factory will send this info to us in the output parameter 
93                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
94                         if (inputDir == "not found"){   inputDir = "";          }
95                         else {
96                                 string path;
97                                 it = parameters.find("sabund");
98                                 //user has given a template file
99                                 if(it != parameters.end()){ 
100                                         path = m->hasPath(it->second);
101                                         //if the user has not given a path then, add inputdir. else leave path alone.
102                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
103                                 }
104                         }
105
106                         //check for required parameters
107                         sabundfile = validParameter.validFile(parameters, "sabund", true);
108                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }
109                         else if (sabundfile == "not found") { sabundfile = "";  m->mothurOut("You must provide a sabund file for the catchall command."); m->mothurOutEndLine(); abort=true; }
110                         else { globaldata->setSabundFile(sabundfile); globaldata->setFormat("sabund"); }
111                         
112                         string label = validParameter.validFile(parameters, "label", false);                    
113                         if (label == "not found") { label = ""; }
114                         else { 
115                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
116                                 else { allLines = 1;  }
117                         }
118                 
119
120                         //if the user changes the output directory command factory will send this info to us in the output parameter 
121                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sabundfile);     }
122                 }
123
124         }
125         catch(exception& e) {
126                 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
127                 exit(1);
128         }
129 }
130 //**********************************************************************************************************************
131
132 void CatchAllCommand::help(){
133         try {
134                 m->mothurOut("The catchall command interfaces mothur with the catchall program written by Linda Woodard, Sean Connolly and John Bunge.\n");
135                 m->mothurOut("For more information about catchall refer to http://www.northeastern.edu/catchall/index.html \n");
136                 m->mothurOut("The catchall executable must be in a folder called catchall in the same folder as your mothur executable, similar to mothur's requirements for using blast. \n");
137                 m->mothurOut("If you are a MAC or Linux user you must also have installed mono, a link to mono is on the webpage. \n");
138                 m->mothurOut("The catchall command parameters are sabund and label, sabund is required. \n");
139                 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
140                 m->mothurOut("The catchall command should be in the following format: \n");
141                 m->mothurOut("catchall(sabund=yourSabundFile) \n");
142                 m->mothurOut("Example: catchall(sabund=abrecovery.fn.sabund) \n");      
143         }
144         catch(exception& e) {
145                 m->errorOut(e, "CatchAllCommand", "help");
146                 exit(1);
147         }
148 }
149
150 /**************************************************************************************/
151 int CatchAllCommand::execute() {        
152         try {
153                 
154                 if (abort == true) { return 0; }
155                 
156                 //prepare full output directory
157                 outputDir = m->getFullPathName(outputDir);
158                 
159                 //get location of catchall
160                 GlobalData* globaldata = GlobalData::getInstance();
161                 path = globaldata->argv;
162                 path = path.substr(0, (path.find_last_of('m')));
163                 path = m->getFullPathName(path);
164
165                 string catchAllCommandExe = ""; 
166                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
167                         catchAllCommandExe += "mono " + path + "catchall/CatchAllcmdL.exe ";
168                 #else
169                         catchAllCommandExe += path + "catchall/CatchAllcmdW.exe ";
170                 #endif
171                 
172                 read = new ReadOTUFile(sabundfile);     
173                 read->read(&*globaldata); 
174                 
175                 SAbundVector* sabund = globaldata->sabund;
176                 string lastLabel = sabund->getLabel();
177                 input = globaldata->ginput;
178                                                 
179                 set<string> processedLabels;
180                 set<string> userLabels = labels;
181                 
182                 string summaryfilename = outputDir + m->getRootName(m->getSimpleName(sabundfile)) + "catchall.summary";
183                 summaryfilename = m->getFullPathName(summaryfilename);
184                 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
185                 
186                 ofstream out;
187                 m->openOutputFile(summaryfilename, out);        
188                 
189                 out << "label\tmodel\testimate\tlci\tuci" << endl;
190                 
191                 //for each label the user selected
192                 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
193
194                                         
195                         if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
196                                         m->mothurOut(sabund->getLabel());  m->mothurOutEndLine();
197                                         
198                                         //create catchall input file from mothur's inputfile
199                                         string filename = process(sabund);
200                                         string outputPath = m->getPathName(filename);
201                                 
202                                         //create system command
203                                         string catchAllCommand = catchAllCommandExe + filename + " " + outputPath + " 1";
204                                 
205                                         //run catchall
206                                         system(catchAllCommand.c_str());
207                                 
208                                         remove(filename.c_str());
209                                 
210                                         filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
211                                 
212                                         outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
213                                         outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
214                                         outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
215                                         outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
216                                 
217                                         createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
218                                                                                 
219                                         if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str());    } delete read;  delete input; globaldata->ginput = NULL; delete sabund;  return 0; }
220
221                                         processedLabels.insert(sabund->getLabel());
222                                         userLabels.erase(sabund->getLabel());
223                         }
224                         
225                         if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
226                                         string saveLabel = sabund->getLabel();
227                                         
228                                         delete sabund;          
229                                         sabund = (input->getSAbundVector(lastLabel));
230                                         
231                                         m->mothurOut(sabund->getLabel());  m->mothurOutEndLine();
232                                         
233
234                                         //create catchall input file from mothur's inputfile
235                                         string filename = process(sabund);
236                                         string outputPath = m->getPathName(filename);
237                                         
238                                         //create system command
239                                         string catchAllCommand = catchAllCommandExe + filename + " " + outputPath + " 1";
240
241                                         //run catchall
242                                         system(catchAllCommand.c_str());
243                                 
244                                         remove(filename.c_str());
245                                 
246                                         filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
247                                 
248                                         outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
249                                         outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
250                                         outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
251                                         outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
252                                 
253                                         createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
254                                 
255                                         if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str());    } delete read;  delete input; globaldata->ginput = NULL; delete sabund;  return 0; }
256
257                                         processedLabels.insert(sabund->getLabel());
258                                         userLabels.erase(sabund->getLabel());
259                                         
260                                         //restore real lastlabel to save below
261                                         sabund->setLabel(saveLabel);
262                         }
263                         
264                         
265                         lastLabel = sabund->getLabel(); 
266                         
267                         delete sabund;          
268                         sabund = (input->getSAbundVector());
269                 }
270                 
271                 //output error messages about any remaining user labels
272                 set<string>::iterator it;
273                 bool needToRun = false;
274                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
275                         m->mothurOut("Your file does not include the label " + *it); 
276                         if (processedLabels.count(lastLabel) != 1) {
277                                 m->mothurOut(". I will use " + lastLabel + ".");  m->mothurOutEndLine();
278                                 needToRun = true;
279                         }else {
280                                 m->mothurOut(". Please refer to " + lastLabel + ".");  m->mothurOutEndLine();
281                         }
282                 }
283                 
284                 //run last label if you need to
285                 if (needToRun == true)  {
286                         if (sabund != NULL) {   delete sabund;  }
287                         sabund = (input->getSAbundVector(lastLabel));
288                         
289                         m->mothurOut(sabund->getLabel());  m->mothurOutEndLine();
290                         
291                         //create catchall input file from mothur's inputfile
292                         string filename = process(sabund);
293                         string outputPath = m->getPathName(filename);
294                         
295                         //create system command
296                         string catchAllCommand = catchAllCommandExe + filename + " " + outputPath + " 1";
297                         
298                         //run catchall
299                         system(catchAllCommand.c_str());
300                         
301                         remove(filename.c_str());
302                         
303                         filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
304                         
305                         outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
306                         outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
307                         outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
308                         outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");        
309                         
310                         createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
311                         
312                         delete sabund;
313                 }
314                 
315                 out.close();
316                 delete read;
317                 delete input; globaldata->ginput = NULL;
318                 
319                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } return 0; }
320                 
321                 m->mothurOutEndLine();
322                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
323                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
324                 m->mothurOutEndLine();
325                 
326
327                 return 0;
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "CatchAllCommand", "execute");
331                 exit(1);
332         }
333 }
334 //**********************************************************************************************************************
335 string CatchAllCommand::process(SAbundVector* sabund) {
336         try {
337                 string filename = outputDir + m->getRootName(m->getSimpleName(sabundfile)) + sabund->getLabel() + ".csv";
338                 filename = m->getFullPathName(filename);
339         
340                 ofstream out;
341                 m->openOutputFile(filename, out);
342                 
343                 for (int i = 1; i <= sabund->getMaxRank(); i++) {
344                         int temp = sabund->get(i);
345                         
346                         if (temp != 0) {
347                                 out << i << "," << temp << endl;
348                         }
349                 }
350                 out.close();
351                 
352                 return filename;
353         
354         }
355         catch(exception& e) {
356                 m->errorOut(e, "CatchAllCommand", "process");
357                 exit(1);
358         }
359 }
360 //**********************************************************************************************************************
361 int CatchAllCommand::createSummaryFile(string file1, string label, ofstream& out) {
362         try {
363                 
364                 ifstream in;
365                 m->openInputFile(file1, in);
366                 
367                 if (!in.eof()) {
368                         
369                         string header = m->getline(in); m->gobble(in);
370                         
371                         int pos = header.find("Total Number of Observed Species =");
372                         string numString = "";
373                         
374                         
375                         if (pos == string::npos) { m->mothurOut("[ERROR]: cannot parse " + file1); m->mothurOutEndLine(); }
376                         else {
377                                 //pos will be the position of the T in total, so we want to count to the position of =
378                                 pos += 34;
379                                 char c=header[pos];
380                                 while (c != ','){
381                                         if (c != ' ') {
382                                                 numString += c;
383                                         }
384                                         pos++;
385                                         c=header[pos];
386                                         
387                                         //sanity check
388                                         if (pos > header.length()) { m->mothurOut("Cannot find number of OTUs in " + file1); m->mothurOutEndLine(); in.close(); return 0; }
389                                 }
390                         }
391                                                                                                                           
392                         string firstline = m->getline(in); m->gobble(in);
393                         vector<string> values;
394                         m->splitAtComma(firstline, values);
395                         
396                         values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
397                         
398                         if (values.size() == 1) { //grab next line if firstline didn't have what you wanted
399                                 string secondline = m->getline(in); m->gobble(in);
400                                 values.clear();
401                                 m->splitAtComma(secondline, values);
402                                 
403                                 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
404                         }
405                         
406                         if (values.size() == 1) { //still not what we wanted fill values with numOTUs
407                                 values.resize(8, "");
408                                 values[1] = "Sobs";
409                                 values[4] = numString;
410                                 values[6] = numString;
411                                 values[7] = numString;
412                         }
413                         
414                         if (values.size() < 8) { values.resize(8, ""); }
415                         
416                         out << label << '\t' << values[1] << '\t' << values[4] << '\t' << values[6] << '\t' << values[7] << endl;
417                 }
418                 
419                 in.close();
420                 
421                 return 0;
422                 
423         }
424         catch(exception& e) {
425                 m->errorOut(e, "CatchAllCommand", "createSummaryFile");
426                 exit(1);
427         }
428 }
429 /**************************************************************************************/
430
431
432