5 * Created by westcott on 5/11/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "catchallcommand.h"
12 //**********************************************************************************************************************
13 vector<string> CatchAllCommand::setParameters(){
15 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
16 //can choose shared or sabund not both, so put them in the same chooseOnlyOneGroup
17 CommandParameter pshared("shared", "InputTypes", "", "", "catchallInputs", "catchallInputs", "none",false,false); parameters.push_back(pshared);
18 CommandParameter psabund("sabund", "InputTypes", "", "", "catchallInputs", "catchallInputs", "none",false,false); parameters.push_back(psabund);
19 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
20 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
22 vector<string> myArray;
23 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
27 m->errorOut(e, "CatchAllCommand", "setParameters");
31 //**********************************************************************************************************************
32 string CatchAllCommand::getHelpString(){
34 string helpString = "";
35 helpString += "The catchall command interfaces mothur with the catchall program written by Linda Woodard, Sean Connolly and John Bunge.\n";
36 helpString += "For more information about catchall refer to http://www.northeastern.edu/catchall/index.html \n";
37 helpString += "The catchall executable must be in the same folder as your mothur executable. \n";
38 helpString += "If you are a MAC or Linux user you must also have installed mono, a link to mono is on the webpage. \n";
39 helpString += "The catchall command parameters are shared, sabund and label. shared or sabund is required. \n";
40 helpString += "The label parameter is used to analyze specific labels in your input.\n";
41 helpString += "The catchall command should be in the following format: \n";
42 helpString += "catchall(sabund=yourSabundFile) \n";
43 helpString += "Example: catchall(sabund=abrecovery.fn.sabund) \n";
47 m->errorOut(e, "CatchAllCommand", "getHelpString");
51 //**********************************************************************************************************************
52 CatchAllCommand::CatchAllCommand(){
54 abort = true; calledHelp = true;
56 //initialize outputTypes
57 vector<string> tempOutNames;
58 outputTypes["csv"] = tempOutNames;
59 outputTypes["summary"] = tempOutNames;
62 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
66 /**************************************************************************************/
67 CatchAllCommand::CatchAllCommand(string option) {
70 abort = false; calledHelp = false;
73 //allow user to run help
74 if(option == "help") { help(); abort = true; calledHelp = true; }
75 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
78 vector<string> myArray = setParameters();
80 OptionParser parser(option);
81 map<string,string> parameters = parser.getParameters();
83 ValidParameters validParameter;
84 map<string, string>::iterator it;
86 //check to make sure all parameters are valid for command
87 for (it = parameters.begin(); it != parameters.end(); it++) {
88 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
91 //initialize outputTypes
92 vector<string> tempOutNames;
93 outputTypes["csv"] = tempOutNames;
94 outputTypes["summary"] = tempOutNames;
96 //if the user changes the input directory command factory will send this info to us in the output parameter
97 string inputDir = validParameter.validFile(parameters, "inputdir", false);
98 if (inputDir == "not found"){ inputDir = ""; }
101 it = parameters.find("sabund");
102 //user has given a template file
103 if(it != parameters.end()){
104 path = m->hasPath(it->second);
105 //if the user has not given a path then, add inputdir. else leave path alone.
106 if (path == "") { parameters["sabund"] = inputDir + it->second; }
109 it = parameters.find("shared");
110 //user has given a template file
111 if(it != parameters.end()){
112 path = m->hasPath(it->second);
113 //if the user has not given a path then, add inputdir. else leave path alone.
114 if (path == "") { parameters["shared"] = inputDir + it->second; }
118 //check for required parameters
119 sabundfile = validParameter.validFile(parameters, "sabund", true);
120 if (sabundfile == "not open") { sabundfile = ""; abort = true; }
121 else if (sabundfile == "not found") { sabundfile = ""; }
122 else { m->setSabundFile(sabundfile); }
124 sharedfile = validParameter.validFile(parameters, "shared", true);
125 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
126 else if (sharedfile == "not found") { sharedfile = ""; }
127 else { m->setSharedFile(sharedfile); }
129 string label = validParameter.validFile(parameters, "label", false);
130 if (label == "not found") { label = ""; }
132 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
133 else { allLines = 1; }
136 if ((sharedfile == "") && (sabundfile == "")) {
137 //is there are current file available for either of these?
138 //give priority to shared, then sabund
139 //if there is a current shared file, use it
140 sharedfile = m->getSharedFile();
141 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
143 sabundfile = m->getSabundFile();
144 if (sabundfile != "") { m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
146 m->mothurOut("No valid current files. You must provide a sabund or shared file before you can use the catchall command."); m->mothurOutEndLine();
152 //if the user changes the output directory command factory will send this info to us in the output parameter
153 outputDir = validParameter.validFile(parameters, "outputdir", false);
154 if (outputDir == "not found"){
155 if (sabundfile != "") { outputDir = m->hasPath(sabundfile); }
156 else { outputDir = m->hasPath(sharedfile); }
161 catch(exception& e) {
162 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
166 /**************************************************************************************/
167 int CatchAllCommand::execute() {
170 if (abort == true) { if (calledHelp) { return 0; } return 2; }
172 //get location of catchall
174 path = path.substr(0, (path.find_last_of("othur")-5));
175 path = m->getFullPathName(path);
177 savedOutputDir = outputDir;
178 string catchAllCommandExe = "";
179 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
180 catchAllCommandExe += "mono " + path + "CatchAllcmdL.exe ";
181 if (outputDir == "") { outputDir = "./"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
183 catchAllCommandExe += "\"" + path + "CatchAllcmdW.exe\"" + " ";
184 if (outputDir == "") { outputDir = ".\\"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
187 //prepare full output directory
188 outputDir = m->getFullPathName(outputDir);
190 vector<string> inputFileNames;
191 if (sharedfile != "") { inputFileNames = parseSharedFile(sharedfile); }
192 else { inputFileNames.push_back(sabundfile); }
194 for (int p = 0; p < inputFileNames.size(); p++) {
195 if (inputFileNames.size() > 1) {
196 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
199 InputData* input = new InputData(inputFileNames[p], "sabund");
200 SAbundVector* sabund = input->getSAbundVector();
201 string lastLabel = sabund->getLabel();
203 set<string> processedLabels;
204 set<string> userLabels = labels;
206 string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "catchall.summary";
207 summaryfilename = m->getFullPathName(summaryfilename);
210 m->openOutputFile(summaryfilename, out);
212 out << "label\tmodel\testimate\tlci\tuci" << endl;
214 //for each label the user selected
215 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
218 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
219 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
221 //create catchall input file from mothur's inputfile
222 string filename = process(sabund, inputFileNames[p]);
223 string outputPath = m->getPathName(filename);
225 //create system command
226 string catchAllCommand = "";
227 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
228 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
230 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
231 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
232 //wrap entire string in ""
233 catchAllCommand = "\"" + catchAllCommand + "\"";
237 system(catchAllCommand.c_str());
239 m->mothurRemove(filename);
241 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
242 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
244 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
245 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
246 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
247 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
249 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
251 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {m->mothurRemove(outputNames[i]); } delete input; delete sabund; return 0; }
253 processedLabels.insert(sabund->getLabel());
254 userLabels.erase(sabund->getLabel());
257 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
258 string saveLabel = sabund->getLabel();
261 sabund = (input->getSAbundVector(lastLabel));
263 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
266 //create catchall input file from mothur's inputfile
267 string filename = process(sabund, inputFileNames[p]);
268 string outputPath = m->getPathName(filename);
270 //create system command
271 string catchAllCommand = "";
272 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
273 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
275 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
276 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
277 catchAllCommand = "\"" + catchAllCommand + "\"";
281 system(catchAllCommand.c_str());
283 m->mothurRemove(filename);
285 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
286 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
288 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
289 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
290 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
291 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
293 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
295 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {m->mothurRemove(outputNames[i]); } delete input; delete sabund; return 0; }
297 processedLabels.insert(sabund->getLabel());
298 userLabels.erase(sabund->getLabel());
300 //restore real lastlabel to save below
301 sabund->setLabel(saveLabel);
305 lastLabel = sabund->getLabel();
308 sabund = (input->getSAbundVector());
311 //output error messages about any remaining user labels
312 set<string>::iterator it;
313 bool needToRun = false;
314 for (it = userLabels.begin(); it != userLabels.end(); it++) {
315 m->mothurOut("Your file does not include the label " + *it);
316 if (processedLabels.count(lastLabel) != 1) {
317 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
320 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
324 //run last label if you need to
325 if (needToRun == true) {
326 if (sabund != NULL) { delete sabund; }
327 sabund = (input->getSAbundVector(lastLabel));
329 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
331 //create catchall input file from mothur's inputfile
332 string filename = process(sabund, inputFileNames[p]);
333 string outputPath = m->getPathName(filename);
335 //create system command
336 string catchAllCommand = "";
337 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
338 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
340 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
341 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
342 catchAllCommand = "\"" + catchAllCommand + "\"";
346 system(catchAllCommand.c_str());
348 m->mothurRemove(filename);
350 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
351 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
353 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
354 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
355 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
356 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
358 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
366 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {m->mothurRemove(outputNames[i]); } return 0; }
370 if (sharedfile == "") {
371 string summaryfilename = savedOutputDir + m->getRootName(m->getSimpleName(inputFileNames[0])) + "catchall.summary";
372 summaryfilename = m->getFullPathName(summaryfilename);
373 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
374 }else { //combine summaries
375 vector<string> sumNames;
376 for (int i = 0; i < inputFileNames.size(); i++) {
377 sumNames.push_back(m->getFullPathName(outputDir + m->getRootName(m->getSimpleName(inputFileNames[i])) + "catchall.summary"));
379 string summaryfilename = combineSummmary(sumNames);
380 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
383 m->mothurOutEndLine();
384 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
385 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
386 m->mothurOutEndLine();
391 catch(exception& e) {
392 m->errorOut(e, "CatchAllCommand", "execute");
396 //**********************************************************************************************************************
397 string CatchAllCommand::process(SAbundVector* sabund, string file1) {
399 string filename = outputDir + m->getRootName(m->getSimpleName(file1)) + sabund->getLabel() + ".csv";
400 filename = m->getFullPathName(filename);
403 m->openOutputFile(filename, out);
405 for (int i = 1; i <= sabund->getMaxRank(); i++) {
406 int temp = sabund->get(i);
409 out << i << "," << temp << endl;
417 catch(exception& e) {
418 m->errorOut(e, "CatchAllCommand", "process");
422 //*********************************************************************************************************************
423 string CatchAllCommand::combineSummmary(vector<string>& outputNames) {
427 string combineFileName = savedOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "catchall.summary";
430 m->openOutputFile(combineFileName, out);
432 out << "label\tgroup\tmodel\testimate\tlci\tuci" << endl;
434 //open each groups summary file
435 string newLabel = "";
437 map<string, vector<string> > files;
438 for (int i=0; i<outputNames.size(); i++) {
439 vector<string> thisFilesLines;
442 m->openInputFile(outputNames[i], temp);
444 //read through first line - labels
449 while (!temp.eof()) {
451 string thisLine = "";
454 for (int j = 0; j < 5; j++) {
458 if (j == 1) { thisLine += groups[i] + "\t" + tempLabel + "\t"; }
459 else{ thisLine += tempLabel + "\t"; }
464 thisFilesLines.push_back(thisLine);
469 files[outputNames[i]] = thisFilesLines;
471 numLines = thisFilesLines.size();
474 m->mothurRemove(outputNames[i]);
478 for (int k = 0; k < numLines; k++) {
480 //grab summary data for each group
481 for (int i=0; i<outputNames.size(); i++) {
482 out << files[outputNames[i]][k];
489 //return combine file name
490 return combineFileName;
493 catch(exception& e) {
494 m->errorOut(e, "CatchAllCommand", "combineSummmary");
498 //**********************************************************************************************************************
499 int CatchAllCommand::createSummaryFile(string file1, string label, ofstream& out) {
503 int able = m->openInputFile(file1, in, "noerror");
505 if (able == 1) { m->mothurOut("[ERROR]: the catchall program did not run properly. Please check to make sure it is located in the same folder as your mothur executable.");m->mothurOutEndLine(); m->control_pressed = true; return 0; }
509 string header = m->getline(in); m->gobble(in);
511 int pos = header.find("Total Number of Observed Species =");
512 string numString = "";
515 if (pos == string::npos) { m->mothurOut("[ERROR]: cannot parse " + file1); m->mothurOutEndLine(); }
517 //pos will be the position of the T in total, so we want to count to the position of =
528 if (pos > header.length()) { m->mothurOut("Cannot find number of OTUs in " + file1); m->mothurOutEndLine(); in.close(); return 0; }
532 string firstline = m->getline(in); m->gobble(in);
533 vector<string> values;
534 m->splitAtComma(firstline, values);
536 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
538 if (values.size() == 1) { //grab next line if firstline didn't have what you wanted
539 string secondline = m->getline(in); m->gobble(in);
541 m->splitAtComma(secondline, values);
543 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
546 if (values.size() == 1) { //still not what we wanted fill values with numOTUs
547 values.resize(8, "");
549 values[4] = numString;
550 values[6] = numString;
551 values[7] = numString;
554 if (values.size() < 8) { values.resize(8, ""); }
556 out << label << '\t' << values[1] << '\t' << values[4] << '\t' << values[6] << '\t' << values[7] << endl;
564 catch(exception& e) {
565 m->errorOut(e, "CatchAllCommand", "createSummaryFile");
569 //**********************************************************************************************************************
570 vector<string> CatchAllCommand::parseSharedFile(string filename) {
572 vector<string> filenames;
575 InputData* input = new InputData(filename, "sharedfile");
576 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
578 string sharedFileRoot = outputDir + m->getRootName(m->getSimpleName(filename));
580 //clears file before we start to write to it below
581 for (int i=0; i<lookup.size(); i++) {
582 m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".sabund"));
583 filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".sabund"));
584 groups.push_back(lookup[i]->getGroup());
587 while(lookup[0] != NULL) {
589 for (int i = 0; i < lookup.size(); i++) {
590 SAbundVector sav = lookup[i]->getSAbundVector();
592 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".sabund", out);
597 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
598 lookup = input->getSharedRAbundVectors();
605 catch(exception& e) {
606 m->errorOut(e, "CatchAllCommand", "parseSharedFile");
610 /**************************************************************************************/