5 * Created by westcott on 5/11/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "catchallcommand.h"
12 //**********************************************************************************************************************
13 vector<string> CatchAllCommand::setParameters(){
15 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
16 //can choose shared or sabund not both, so put them in the same chooseOnlyOneGroup
17 CommandParameter pshared("shared", "InputTypes", "", "", "catchallInputs", "catchallInputs", "none",false,false); parameters.push_back(pshared);
18 CommandParameter psabund("sabund", "InputTypes", "", "", "catchallInputs", "catchallInputs", "none",false,false); parameters.push_back(psabund);
19 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
20 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
22 vector<string> myArray;
23 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
27 m->errorOut(e, "CatchAllCommand", "setParameters");
31 //**********************************************************************************************************************
32 string CatchAllCommand::getHelpString(){
34 string helpString = "";
35 helpString += "The catchall command interfaces mothur with the catchall program written by Linda Woodard, Sean Connolly and John Bunge.\n";
36 helpString += "For more information about catchall refer to http://www.northeastern.edu/catchall/index.html \n";
37 helpString += "The catchall executable must be in the same folder as your mothur executable. \n";
38 helpString += "If you are a MAC or Linux user you must also have installed mono, a link to mono is on the webpage. \n";
39 helpString += "The catchall command parameters are shared, sabund and label. shared or sabund is required. \n";
40 helpString += "The label parameter is used to analyze specific labels in your input.\n";
41 helpString += "The catchall command should be in the following format: \n";
42 helpString += "catchall(sabund=yourSabundFile) \n";
43 helpString += "Example: catchall(sabund=abrecovery.fn.sabund) \n";
47 m->errorOut(e, "CatchAllCommand", "getHelpString");
51 //**********************************************************************************************************************
52 CatchAllCommand::CatchAllCommand(){
54 abort = true; calledHelp = true;
56 //initialize outputTypes
57 vector<string> tempOutNames;
58 outputTypes["csv"] = tempOutNames;
59 outputTypes["summary"] = tempOutNames;
62 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
66 /**************************************************************************************/
67 CatchAllCommand::CatchAllCommand(string option) {
70 abort = false; calledHelp = false;
73 //allow user to run help
74 if(option == "help") { help(); abort = true; calledHelp = true; }
77 vector<string> myArray = setParameters();
79 OptionParser parser(option);
80 map<string,string> parameters = parser.getParameters();
82 ValidParameters validParameter;
83 map<string, string>::iterator it;
85 //check to make sure all parameters are valid for command
86 for (it = parameters.begin(); it != parameters.end(); it++) {
87 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
90 //initialize outputTypes
91 vector<string> tempOutNames;
92 outputTypes["csv"] = tempOutNames;
93 outputTypes["summary"] = tempOutNames;
95 //if the user changes the input directory command factory will send this info to us in the output parameter
96 string inputDir = validParameter.validFile(parameters, "inputdir", false);
97 if (inputDir == "not found"){ inputDir = ""; }
100 it = parameters.find("sabund");
101 //user has given a template file
102 if(it != parameters.end()){
103 path = m->hasPath(it->second);
104 //if the user has not given a path then, add inputdir. else leave path alone.
105 if (path == "") { parameters["sabund"] = inputDir + it->second; }
108 it = parameters.find("shared");
109 //user has given a template file
110 if(it != parameters.end()){
111 path = m->hasPath(it->second);
112 //if the user has not given a path then, add inputdir. else leave path alone.
113 if (path == "") { parameters["shared"] = inputDir + it->second; }
117 //check for required parameters
118 sabundfile = validParameter.validFile(parameters, "sabund", true);
119 if (sabundfile == "not open") { sabundfile = ""; abort = true; }
120 else if (sabundfile == "not found") { sabundfile = ""; }
122 sharedfile = validParameter.validFile(parameters, "shared", true);
123 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
124 else if (sharedfile == "not found") { sharedfile = ""; }
126 string label = validParameter.validFile(parameters, "label", false);
127 if (label == "not found") { label = ""; }
129 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
130 else { allLines = 1; }
133 if ((sharedfile == "") && (sabundfile == "")) {
134 //is there are current file available for either of these?
135 //give priority to shared, then sabund
136 //if there is a current shared file, use it
137 sharedfile = m->getSharedFile();
138 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
140 sabundfile = m->getSabundFile();
141 if (sabundfile != "") { m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
143 m->mothurOut("No valid current files. You must provide a sabund or shared file before you can use the catchall command."); m->mothurOutEndLine();
149 //if the user changes the output directory command factory will send this info to us in the output parameter
150 outputDir = validParameter.validFile(parameters, "outputdir", false);
151 if (outputDir == "not found"){
152 if (sabundfile != "") { outputDir = m->hasPath(sabundfile); }
153 else { outputDir = m->hasPath(sharedfile); }
158 catch(exception& e) {
159 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
163 /**************************************************************************************/
164 int CatchAllCommand::execute() {
167 if (abort == true) { if (calledHelp) { return 0; } return 2; }
169 //get location of catchall
171 path = path.substr(0, (path.find_last_of("othur")-5));
172 path = m->getFullPathName(path);
174 savedOutputDir = outputDir;
175 string catchAllCommandExe = "";
176 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
177 catchAllCommandExe += "mono " + path + "CatchAllcmdL.exe ";
178 if (outputDir == "") { outputDir = "./"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
180 catchAllCommandExe += "\"" + path + "CatchAllcmdW.exe\"" + " ";
181 if (outputDir == "") { outputDir = ".\\"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
184 //prepare full output directory
185 outputDir = m->getFullPathName(outputDir);
187 vector<string> inputFileNames;
188 if (sharedfile != "") { inputFileNames = parseSharedFile(sharedfile); }
189 else { inputFileNames.push_back(sabundfile); }
191 for (int p = 0; p < inputFileNames.size(); p++) {
192 if (inputFileNames.size() > 1) {
193 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
196 InputData* input = new InputData(inputFileNames[p], "sabund");
197 SAbundVector* sabund = input->getSAbundVector();
198 string lastLabel = sabund->getLabel();
200 set<string> processedLabels;
201 set<string> userLabels = labels;
203 string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "catchall.summary";
204 summaryfilename = m->getFullPathName(summaryfilename);
207 m->openOutputFile(summaryfilename, out);
209 out << "label\tmodel\testimate\tlci\tuci" << endl;
211 //for each label the user selected
212 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
215 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
216 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
218 //create catchall input file from mothur's inputfile
219 string filename = process(sabund, inputFileNames[p]);
220 string outputPath = m->getPathName(filename);
222 //create system command
223 string catchAllCommand = "";
224 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
225 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
227 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
228 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
229 //wrap entire string in ""
230 catchAllCommand = "\"" + catchAllCommand + "\"";
234 system(catchAllCommand.c_str());
236 remove(filename.c_str());
238 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
239 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
241 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
242 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
243 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
244 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
246 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
248 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete input; delete sabund; return 0; }
250 processedLabels.insert(sabund->getLabel());
251 userLabels.erase(sabund->getLabel());
254 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
255 string saveLabel = sabund->getLabel();
258 sabund = (input->getSAbundVector(lastLabel));
260 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
263 //create catchall input file from mothur's inputfile
264 string filename = process(sabund, inputFileNames[p]);
265 string outputPath = m->getPathName(filename);
267 //create system command
268 string catchAllCommand = "";
269 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
270 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
272 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
273 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
274 catchAllCommand = "\"" + catchAllCommand + "\"";
278 system(catchAllCommand.c_str());
280 remove(filename.c_str());
282 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
283 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
285 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
286 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
287 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
288 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
290 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
292 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete input; delete sabund; return 0; }
294 processedLabels.insert(sabund->getLabel());
295 userLabels.erase(sabund->getLabel());
297 //restore real lastlabel to save below
298 sabund->setLabel(saveLabel);
302 lastLabel = sabund->getLabel();
305 sabund = (input->getSAbundVector());
308 //output error messages about any remaining user labels
309 set<string>::iterator it;
310 bool needToRun = false;
311 for (it = userLabels.begin(); it != userLabels.end(); it++) {
312 m->mothurOut("Your file does not include the label " + *it);
313 if (processedLabels.count(lastLabel) != 1) {
314 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
317 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
321 //run last label if you need to
322 if (needToRun == true) {
323 if (sabund != NULL) { delete sabund; }
324 sabund = (input->getSAbundVector(lastLabel));
326 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
328 //create catchall input file from mothur's inputfile
329 string filename = process(sabund, inputFileNames[p]);
330 string outputPath = m->getPathName(filename);
332 //create system command
333 string catchAllCommand = "";
334 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
335 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
337 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
338 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
339 catchAllCommand = "\"" + catchAllCommand + "\"";
343 system(catchAllCommand.c_str());
345 remove(filename.c_str());
347 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
348 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
350 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
351 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
352 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
353 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
355 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
363 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } return 0; }
367 if (sharedfile == "") {
368 string summaryfilename = savedOutputDir + m->getRootName(m->getSimpleName(inputFileNames[0])) + "catchall.summary";
369 summaryfilename = m->getFullPathName(summaryfilename);
370 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
371 }else { //combine summaries
372 vector<string> sumNames;
373 for (int i = 0; i < inputFileNames.size(); i++) {
374 sumNames.push_back(m->getFullPathName(outputDir + m->getRootName(m->getSimpleName(inputFileNames[i])) + "catchall.summary"));
376 string summaryfilename = combineSummmary(sumNames);
377 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
380 m->mothurOutEndLine();
381 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
382 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
383 m->mothurOutEndLine();
388 catch(exception& e) {
389 m->errorOut(e, "CatchAllCommand", "execute");
393 //**********************************************************************************************************************
394 string CatchAllCommand::process(SAbundVector* sabund, string file1) {
396 string filename = outputDir + m->getRootName(m->getSimpleName(file1)) + sabund->getLabel() + ".csv";
397 filename = m->getFullPathName(filename);
400 m->openOutputFile(filename, out);
402 for (int i = 1; i <= sabund->getMaxRank(); i++) {
403 int temp = sabund->get(i);
406 out << i << "," << temp << endl;
414 catch(exception& e) {
415 m->errorOut(e, "CatchAllCommand", "process");
419 //*********************************************************************************************************************
420 string CatchAllCommand::combineSummmary(vector<string>& outputNames) {
424 string combineFileName = savedOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "catchall.summary";
427 m->openOutputFile(combineFileName, out);
429 out << "label\tgroup\tmodel\testimate\tlci\tuci" << endl;
431 //open each groups summary file
432 string newLabel = "";
434 map<string, vector<string> > files;
435 for (int i=0; i<outputNames.size(); i++) {
436 vector<string> thisFilesLines;
439 m->openInputFile(outputNames[i], temp);
441 //read through first line - labels
446 while (!temp.eof()) {
448 string thisLine = "";
451 for (int j = 0; j < 5; j++) {
455 if (j == 1) { thisLine += groups[i] + "\t" + tempLabel + "\t"; }
456 else{ thisLine += tempLabel + "\t"; }
461 thisFilesLines.push_back(thisLine);
466 files[outputNames[i]] = thisFilesLines;
468 numLines = thisFilesLines.size();
471 remove(outputNames[i].c_str());
475 for (int k = 0; k < numLines; k++) {
477 //grab summary data for each group
478 for (int i=0; i<outputNames.size(); i++) {
479 out << files[outputNames[i]][k];
486 //return combine file name
487 return combineFileName;
490 catch(exception& e) {
491 m->errorOut(e, "CatchAllCommand", "combineSummmary");
495 //**********************************************************************************************************************
496 int CatchAllCommand::createSummaryFile(string file1, string label, ofstream& out) {
500 int able = m->openInputFile(file1, in, "noerror");
502 if (able == 1) { m->mothurOut("[ERROR]: the catchall program did not run properly. Please check to make sure it is located in the same folder as your mothur executable.");m->mothurOutEndLine(); m->control_pressed = true; return 0; }
506 string header = m->getline(in); m->gobble(in);
508 int pos = header.find("Total Number of Observed Species =");
509 string numString = "";
512 if (pos == string::npos) { m->mothurOut("[ERROR]: cannot parse " + file1); m->mothurOutEndLine(); }
514 //pos will be the position of the T in total, so we want to count to the position of =
525 if (pos > header.length()) { m->mothurOut("Cannot find number of OTUs in " + file1); m->mothurOutEndLine(); in.close(); return 0; }
529 string firstline = m->getline(in); m->gobble(in);
530 vector<string> values;
531 m->splitAtComma(firstline, values);
533 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
535 if (values.size() == 1) { //grab next line if firstline didn't have what you wanted
536 string secondline = m->getline(in); m->gobble(in);
538 m->splitAtComma(secondline, values);
540 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
543 if (values.size() == 1) { //still not what we wanted fill values with numOTUs
544 values.resize(8, "");
546 values[4] = numString;
547 values[6] = numString;
548 values[7] = numString;
551 if (values.size() < 8) { values.resize(8, ""); }
553 out << label << '\t' << values[1] << '\t' << values[4] << '\t' << values[6] << '\t' << values[7] << endl;
561 catch(exception& e) {
562 m->errorOut(e, "CatchAllCommand", "createSummaryFile");
566 //**********************************************************************************************************************
567 vector<string> CatchAllCommand::parseSharedFile(string filename) {
569 vector<string> filenames;
572 InputData* input = new InputData(filename, "sharedfile");
573 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
575 string sharedFileRoot = outputDir + m->getRootName(m->getSimpleName(filename));
577 //clears file before we start to write to it below
578 for (int i=0; i<lookup.size(); i++) {
579 remove((sharedFileRoot + lookup[i]->getGroup() + ".sabund").c_str());
580 filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".sabund"));
581 groups.push_back(lookup[i]->getGroup());
584 while(lookup[0] != NULL) {
586 for (int i = 0; i < lookup.size(); i++) {
587 SAbundVector sav = lookup[i]->getSAbundVector();
589 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".sabund", out);
594 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
595 lookup = input->getSharedRAbundVectors();
602 catch(exception& e) {
603 m->errorOut(e, "CatchAllCommand", "parseSharedFile");
607 /**************************************************************************************/