5 * Created by westcott on 5/11/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "catchallcommand.h"
11 #include "globaldata.hpp"
13 //**********************************************************************************************************************
14 vector<string> CatchAllCommand::setParameters(){
16 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
17 //can choose shared or sabund not both, so put them in the same chooseOnlyOneGroup
18 CommandParameter pshared("shared", "InputTypes", "", "", "catchallInputs", "catchallInputs", "none",false,false); parameters.push_back(pshared);
19 CommandParameter psabund("sabund", "InputTypes", "", "", "catchallInputs", "catchallInputs", "none",false,false); parameters.push_back(psabund);
20 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
21 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23 vector<string> myArray;
24 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
28 m->errorOut(e, "CatchAllCommand", "setParameters");
32 //**********************************************************************************************************************
33 string CatchAllCommand::getHelpString(){
35 string helpString = "";
36 helpString += "The catchall command interfaces mothur with the catchall program written by Linda Woodard, Sean Connolly and John Bunge.\n";
37 helpString += "For more information about catchall refer to http://www.northeastern.edu/catchall/index.html \n";
38 helpString += "The catchall executable must be in the same folder as your mothur executable. \n";
39 helpString += "If you are a MAC or Linux user you must also have installed mono, a link to mono is on the webpage. \n";
40 helpString += "The catchall command parameters are shared, sabund and label. shared or sabund is required. \n";
41 helpString += "The label parameter is used to analyze specific labels in your input.\n";
42 helpString += "The catchall command should be in the following format: \n";
43 helpString += "catchall(sabund=yourSabundFile) \n";
44 helpString += "Example: catchall(sabund=abrecovery.fn.sabund) \n";
48 m->errorOut(e, "CatchAllCommand", "getHelpString");
52 //**********************************************************************************************************************
53 CatchAllCommand::CatchAllCommand(){
55 abort = true; calledHelp = true;
57 //initialize outputTypes
58 vector<string> tempOutNames;
59 outputTypes["csv"] = tempOutNames;
60 outputTypes["summary"] = tempOutNames;
63 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
67 /**************************************************************************************/
68 CatchAllCommand::CatchAllCommand(string option) {
71 abort = false; calledHelp = false;
74 //allow user to run help
75 if(option == "help") { help(); abort = true; calledHelp = true; }
78 vector<string> myArray = setParameters();
80 OptionParser parser(option);
81 map<string,string> parameters = parser.getParameters();
83 ValidParameters validParameter;
84 map<string, string>::iterator it;
86 //check to make sure all parameters are valid for command
87 for (it = parameters.begin(); it != parameters.end(); it++) {
88 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
91 //initialize outputTypes
92 vector<string> tempOutNames;
93 outputTypes["csv"] = tempOutNames;
94 outputTypes["summary"] = tempOutNames;
96 //if the user changes the input directory command factory will send this info to us in the output parameter
97 string inputDir = validParameter.validFile(parameters, "inputdir", false);
98 if (inputDir == "not found"){ inputDir = ""; }
101 it = parameters.find("sabund");
102 //user has given a template file
103 if(it != parameters.end()){
104 path = m->hasPath(it->second);
105 //if the user has not given a path then, add inputdir. else leave path alone.
106 if (path == "") { parameters["sabund"] = inputDir + it->second; }
109 it = parameters.find("shared");
110 //user has given a template file
111 if(it != parameters.end()){
112 path = m->hasPath(it->second);
113 //if the user has not given a path then, add inputdir. else leave path alone.
114 if (path == "") { parameters["shared"] = inputDir + it->second; }
118 //check for required parameters
119 sabundfile = validParameter.validFile(parameters, "sabund", true);
120 if (sabundfile == "not open") { sabundfile = ""; abort = true; }
121 else if (sabundfile == "not found") { sabundfile = ""; }
123 sharedfile = validParameter.validFile(parameters, "shared", true);
124 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
125 else if (sharedfile == "not found") { sharedfile = ""; }
127 string label = validParameter.validFile(parameters, "label", false);
128 if (label == "not found") { label = ""; }
130 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
131 else { allLines = 1; }
134 if ((sharedfile == "") && (sabundfile == "")) {
135 //is there are current file available for either of these?
136 //give priority to shared, then sabund
137 //if there is a current shared file, use it
138 sharedfile = m->getSharedFile();
139 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
141 sabundfile = m->getSabundFile();
142 if (sabundfile != "") { m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
144 m->mothurOut("No valid current files. You must provide a sabund or shared file before you can use the catchall command."); m->mothurOutEndLine();
150 //if the user changes the output directory command factory will send this info to us in the output parameter
151 outputDir = validParameter.validFile(parameters, "outputdir", false);
152 if (outputDir == "not found"){
153 if (sabundfile != "") { outputDir = m->hasPath(sabundfile); }
154 else { outputDir = m->hasPath(sharedfile); }
159 catch(exception& e) {
160 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
164 /**************************************************************************************/
165 int CatchAllCommand::execute() {
168 if (abort == true) { if (calledHelp) { return 0; } return 2; }
170 //get location of catchall
172 path = path.substr(0, (path.find_last_of("othur")-5));
173 path = m->getFullPathName(path);
175 savedOutputDir = outputDir;
176 string catchAllCommandExe = "";
177 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
178 catchAllCommandExe += "mono " + path + "CatchAllcmdL.exe ";
179 if (outputDir == "") { outputDir = "./"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
181 catchAllCommandExe += "\"" + path + "CatchAllcmdW.exe\"" + " ";
182 if (outputDir == "") { outputDir = ".\\"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
185 //prepare full output directory
186 outputDir = m->getFullPathName(outputDir);
188 vector<string> inputFileNames;
189 if (sharedfile != "") { inputFileNames = parseSharedFile(sharedfile); }
190 else { inputFileNames.push_back(sabundfile); }
192 for (int p = 0; p < inputFileNames.size(); p++) {
193 if (inputFileNames.size() > 1) {
194 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
197 InputData* input = new InputData(inputFileNames[p], "sabund");
198 SAbundVector* sabund = input->getSAbundVector();
199 string lastLabel = sabund->getLabel();
201 set<string> processedLabels;
202 set<string> userLabels = labels;
204 string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "catchall.summary";
205 summaryfilename = m->getFullPathName(summaryfilename);
208 m->openOutputFile(summaryfilename, out);
210 out << "label\tmodel\testimate\tlci\tuci" << endl;
212 //for each label the user selected
213 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
216 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
217 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
219 //create catchall input file from mothur's inputfile
220 string filename = process(sabund, inputFileNames[p]);
221 string outputPath = m->getPathName(filename);
223 //create system command
224 string catchAllCommand = "";
225 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
226 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
228 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
229 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
230 //wrap entire string in ""
231 catchAllCommand = "\"" + catchAllCommand + "\"";
235 system(catchAllCommand.c_str());
237 remove(filename.c_str());
239 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
240 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
242 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
243 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
244 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
245 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
247 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
249 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete input; delete sabund; return 0; }
251 processedLabels.insert(sabund->getLabel());
252 userLabels.erase(sabund->getLabel());
255 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
256 string saveLabel = sabund->getLabel();
259 sabund = (input->getSAbundVector(lastLabel));
261 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
264 //create catchall input file from mothur's inputfile
265 string filename = process(sabund, inputFileNames[p]);
266 string outputPath = m->getPathName(filename);
268 //create system command
269 string catchAllCommand = "";
270 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
271 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
273 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
274 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
275 catchAllCommand = "\"" + catchAllCommand + "\"";
279 system(catchAllCommand.c_str());
281 remove(filename.c_str());
283 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
284 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
286 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
287 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
288 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
289 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
291 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
293 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete input; delete sabund; return 0; }
295 processedLabels.insert(sabund->getLabel());
296 userLabels.erase(sabund->getLabel());
298 //restore real lastlabel to save below
299 sabund->setLabel(saveLabel);
303 lastLabel = sabund->getLabel();
306 sabund = (input->getSAbundVector());
309 //output error messages about any remaining user labels
310 set<string>::iterator it;
311 bool needToRun = false;
312 for (it = userLabels.begin(); it != userLabels.end(); it++) {
313 m->mothurOut("Your file does not include the label " + *it);
314 if (processedLabels.count(lastLabel) != 1) {
315 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
318 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
322 //run last label if you need to
323 if (needToRun == true) {
324 if (sabund != NULL) { delete sabund; }
325 sabund = (input->getSAbundVector(lastLabel));
327 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
329 //create catchall input file from mothur's inputfile
330 string filename = process(sabund, inputFileNames[p]);
331 string outputPath = m->getPathName(filename);
333 //create system command
334 string catchAllCommand = "";
335 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
336 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
338 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
339 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
340 catchAllCommand = "\"" + catchAllCommand + "\"";
344 system(catchAllCommand.c_str());
346 remove(filename.c_str());
348 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
349 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
351 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
352 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
353 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
354 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
356 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
364 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } return 0; }
368 if (sharedfile == "") {
369 string summaryfilename = savedOutputDir + m->getRootName(m->getSimpleName(inputFileNames[0])) + "catchall.summary";
370 summaryfilename = m->getFullPathName(summaryfilename);
371 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
372 }else { //combine summaries
373 vector<string> sumNames;
374 for (int i = 0; i < inputFileNames.size(); i++) {
375 sumNames.push_back(m->getFullPathName(outputDir + m->getRootName(m->getSimpleName(inputFileNames[i])) + "catchall.summary"));
377 string summaryfilename = combineSummmary(sumNames);
378 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
381 m->mothurOutEndLine();
382 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
383 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
384 m->mothurOutEndLine();
389 catch(exception& e) {
390 m->errorOut(e, "CatchAllCommand", "execute");
394 //**********************************************************************************************************************
395 string CatchAllCommand::process(SAbundVector* sabund, string file1) {
397 string filename = outputDir + m->getRootName(m->getSimpleName(file1)) + sabund->getLabel() + ".csv";
398 filename = m->getFullPathName(filename);
401 m->openOutputFile(filename, out);
403 for (int i = 1; i <= sabund->getMaxRank(); i++) {
404 int temp = sabund->get(i);
407 out << i << "," << temp << endl;
415 catch(exception& e) {
416 m->errorOut(e, "CatchAllCommand", "process");
420 //*********************************************************************************************************************
421 string CatchAllCommand::combineSummmary(vector<string>& outputNames) {
425 string combineFileName = savedOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "catchall.summary";
428 m->openOutputFile(combineFileName, out);
430 out << "label\tgroup\tmodel\testimate\tlci\tuci" << endl;
432 //open each groups summary file
433 string newLabel = "";
435 map<string, vector<string> > files;
436 for (int i=0; i<outputNames.size(); i++) {
437 vector<string> thisFilesLines;
440 m->openInputFile(outputNames[i], temp);
442 //read through first line - labels
447 while (!temp.eof()) {
449 string thisLine = "";
452 for (int j = 0; j < 5; j++) {
456 if (j == 1) { thisLine += groups[i] + "\t" + tempLabel + "\t"; }
457 else{ thisLine += tempLabel + "\t"; }
462 thisFilesLines.push_back(thisLine);
467 files[outputNames[i]] = thisFilesLines;
469 numLines = thisFilesLines.size();
472 remove(outputNames[i].c_str());
476 for (int k = 0; k < numLines; k++) {
478 //grab summary data for each group
479 for (int i=0; i<outputNames.size(); i++) {
480 out << files[outputNames[i]][k];
487 //return combine file name
488 return combineFileName;
491 catch(exception& e) {
492 m->errorOut(e, "CatchAllCommand", "combineSummmary");
496 //**********************************************************************************************************************
497 int CatchAllCommand::createSummaryFile(string file1, string label, ofstream& out) {
501 int able = m->openInputFile(file1, in, "noerror");
503 if (able == 1) { m->mothurOut("[ERROR]: the catchall program did not run properly. Please check to make sure it is located in the same folder as your mothur executable.");m->mothurOutEndLine(); m->control_pressed = true; return 0; }
507 string header = m->getline(in); m->gobble(in);
509 int pos = header.find("Total Number of Observed Species =");
510 string numString = "";
513 if (pos == string::npos) { m->mothurOut("[ERROR]: cannot parse " + file1); m->mothurOutEndLine(); }
515 //pos will be the position of the T in total, so we want to count to the position of =
526 if (pos > header.length()) { m->mothurOut("Cannot find number of OTUs in " + file1); m->mothurOutEndLine(); in.close(); return 0; }
530 string firstline = m->getline(in); m->gobble(in);
531 vector<string> values;
532 m->splitAtComma(firstline, values);
534 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
536 if (values.size() == 1) { //grab next line if firstline didn't have what you wanted
537 string secondline = m->getline(in); m->gobble(in);
539 m->splitAtComma(secondline, values);
541 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
544 if (values.size() == 1) { //still not what we wanted fill values with numOTUs
545 values.resize(8, "");
547 values[4] = numString;
548 values[6] = numString;
549 values[7] = numString;
552 if (values.size() < 8) { values.resize(8, ""); }
554 out << label << '\t' << values[1] << '\t' << values[4] << '\t' << values[6] << '\t' << values[7] << endl;
562 catch(exception& e) {
563 m->errorOut(e, "CatchAllCommand", "createSummaryFile");
567 //**********************************************************************************************************************
568 vector<string> CatchAllCommand::parseSharedFile(string filename) {
570 vector<string> filenames;
573 InputData* input = new InputData(filename, "sharedfile");
574 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
576 string sharedFileRoot = outputDir + m->getRootName(m->getSimpleName(filename));
578 //clears file before we start to write to it below
579 for (int i=0; i<lookup.size(); i++) {
580 remove((sharedFileRoot + lookup[i]->getGroup() + ".sabund").c_str());
581 filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".sabund"));
582 groups.push_back(lookup[i]->getGroup());
585 while(lookup[0] != NULL) {
587 for (int i = 0; i < lookup.size(); i++) {
588 SAbundVector sav = lookup[i]->getSAbundVector();
590 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".sabund", out);
595 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
596 lookup = input->getSharedRAbundVectors();
603 catch(exception& e) {
604 m->errorOut(e, "CatchAllCommand", "parseSharedFile");
608 /**************************************************************************************/