5 * Created by westcott on 5/11/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "catchallcommand.h"
11 #include "globaldata.hpp"
13 //**********************************************************************************************************************
14 vector<string> CatchAllCommand::getValidParameters(){
16 string AlignArray[] = {"sabund","shared","label","inputdir","outputdir"};
17 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
21 m->errorOut(e, "CatchAllCommand", "getValidParameters");
25 //**********************************************************************************************************************
26 CatchAllCommand::CatchAllCommand(){
28 abort = true; calledHelp = true;
29 //initialize outputTypes
30 vector<string> tempOutNames;
31 outputTypes["csv"] = tempOutNames;
32 outputTypes["summary"] = tempOutNames;
35 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
39 //**********************************************************************************************************************
40 vector<string> CatchAllCommand::getRequiredParameters(){
42 string AlignArray[] = {"sabund","shared","or"};
43 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
47 m->errorOut(e, "CatchAllCommand", "getRequiredParameters");
51 //**********************************************************************************************************************
52 vector<string> CatchAllCommand::getRequiredFiles(){
54 vector<string> myArray;
58 m->errorOut(e, "CatchAllCommand", "getRequiredFiles");
62 /**************************************************************************************/
63 CatchAllCommand::CatchAllCommand(string option) {
65 globaldata = GlobalData::getInstance();
66 abort = false; calledHelp = false;
69 //allow user to run help
70 if(option == "help") { help(); abort = true; calledHelp = true; }
73 //valid paramters for this command
74 string Array[] = {"shared","sabund","label","inputdir","outputdir"};
75 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77 OptionParser parser(option);
78 map<string,string> parameters = parser.getParameters();
80 ValidParameters validParameter;
81 map<string, string>::iterator it;
83 //check to make sure all parameters are valid for command
84 for (it = parameters.begin(); it != parameters.end(); it++) {
85 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
88 //initialize outputTypes
89 vector<string> tempOutNames;
90 outputTypes["csv"] = tempOutNames;
91 outputTypes["summary"] = tempOutNames;
93 //if the user changes the input directory command factory will send this info to us in the output parameter
94 string inputDir = validParameter.validFile(parameters, "inputdir", false);
95 if (inputDir == "not found"){ inputDir = ""; }
98 it = parameters.find("sabund");
99 //user has given a template file
100 if(it != parameters.end()){
101 path = m->hasPath(it->second);
102 //if the user has not given a path then, add inputdir. else leave path alone.
103 if (path == "") { parameters["sabund"] = inputDir + it->second; }
106 it = parameters.find("shared");
107 //user has given a template file
108 if(it != parameters.end()){
109 path = m->hasPath(it->second);
110 //if the user has not given a path then, add inputdir. else leave path alone.
111 if (path == "") { parameters["shared"] = inputDir + it->second; }
115 //check for required parameters
116 sabundfile = validParameter.validFile(parameters, "sabund", true);
117 if (sabundfile == "not open") { sabundfile = ""; abort = true; }
118 else if (sabundfile == "not found") { sabundfile = ""; }
119 else { globaldata->setSabundFile(sabundfile); globaldata->setFormat("sabund"); }
121 sharedfile = validParameter.validFile(parameters, "shared", true);
122 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
123 else if (sharedfile == "not found") { sharedfile = ""; }
125 //check for shared file loaded during read.otu
126 if (sharedfile == "") {
127 if (globaldata->getSharedFile() != "") { sharedfile = globaldata->getSharedFile(); }
130 string label = validParameter.validFile(parameters, "label", false);
131 if (label == "not found") { label = ""; }
133 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
134 else { allLines = 1; }
137 if ((sharedfile == "") && (sabundfile == "")) { m->mothurOut("You must provide a sabund or shared file for the catchall command."); m->mothurOutEndLine(); abort=true; }
139 //if the user changes the output directory command factory will send this info to us in the output parameter
140 outputDir = validParameter.validFile(parameters, "outputdir", false);
141 if (outputDir == "not found"){
142 if (sabundfile != "") { outputDir = m->hasPath(sabundfile); }
143 else { outputDir = m->hasPath(sharedfile); }
148 catch(exception& e) {
149 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
153 //**********************************************************************************************************************
155 void CatchAllCommand::help(){
157 m->mothurOut("The catchall command interfaces mothur with the catchall program written by Linda Woodard, Sean Connolly and John Bunge.\n");
158 m->mothurOut("For more information about catchall refer to http://www.northeastern.edu/catchall/index.html \n");
159 m->mothurOut("The catchall executable must be in the same folder as your mothur executable. \n");
160 m->mothurOut("If you are a MAC or Linux user you must also have installed mono, a link to mono is on the webpage. \n");
161 m->mothurOut("The catchall command parameters are shared, sabund and label. shared or sabund is required. \n");
162 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
163 m->mothurOut("The catchall command should be in the following format: \n");
164 m->mothurOut("catchall(sabund=yourSabundFile) \n");
165 m->mothurOut("Example: catchall(sabund=abrecovery.fn.sabund) \n");
167 catch(exception& e) {
168 m->errorOut(e, "CatchAllCommand", "help");
173 /**************************************************************************************/
174 int CatchAllCommand::execute() {
177 if (abort == true) { if (calledHelp) { return 0; } return 2; }
179 //get location of catchall
180 GlobalData* globaldata = GlobalData::getInstance();
181 path = globaldata->argv;
182 path = path.substr(0, (path.find_last_of("othur")-5));
183 path = m->getFullPathName(path);
185 savedOutputDir = outputDir;
186 string catchAllCommandExe = "";
187 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
188 catchAllCommandExe += "mono " + path + "CatchAllcmdL.exe ";
189 if (outputDir == "") { outputDir = "./"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
191 catchAllCommandExe += "\"" + path + "CatchAllcmdW.exe\"" + " ";
192 if (outputDir == "") { outputDir = ".\\"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
195 //prepare full output directory
196 outputDir = m->getFullPathName(outputDir);
198 vector<string> inputFileNames;
199 if (sharedfile != "") { inputFileNames = parseSharedFile(sharedfile); globaldata->setFormat("sabund"); }
200 else { inputFileNames.push_back(sabundfile); }
202 for (int p = 0; p < inputFileNames.size(); p++) {
203 if (inputFileNames.size() > 1) {
204 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
207 InputData* input = new InputData(inputFileNames[p], "sabund");
208 SAbundVector* sabund = input->getSAbundVector();
209 string lastLabel = sabund->getLabel();
211 set<string> processedLabels;
212 set<string> userLabels = labels;
214 string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "catchall.summary";
215 summaryfilename = m->getFullPathName(summaryfilename);
218 m->openOutputFile(summaryfilename, out);
220 out << "label\tmodel\testimate\tlci\tuci" << endl;
222 //for each label the user selected
223 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
226 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
227 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
229 //create catchall input file from mothur's inputfile
230 string filename = process(sabund, inputFileNames[p]);
231 string outputPath = m->getPathName(filename);
233 //create system command
234 string catchAllCommand = "";
235 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
236 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
238 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
239 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
240 //wrap entire string in ""
241 catchAllCommand = "\"" + catchAllCommand + "\"";
245 system(catchAllCommand.c_str());
247 remove(filename.c_str());
249 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
250 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
252 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
253 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
254 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
255 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
257 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
259 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete input; delete sabund; return 0; }
261 processedLabels.insert(sabund->getLabel());
262 userLabels.erase(sabund->getLabel());
265 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
266 string saveLabel = sabund->getLabel();
269 sabund = (input->getSAbundVector(lastLabel));
271 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
274 //create catchall input file from mothur's inputfile
275 string filename = process(sabund, inputFileNames[p]);
276 string outputPath = m->getPathName(filename);
278 //create system command
279 string catchAllCommand = "";
280 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
281 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
283 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
284 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
285 catchAllCommand = "\"" + catchAllCommand + "\"";
289 system(catchAllCommand.c_str());
291 remove(filename.c_str());
293 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
294 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
296 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
297 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
298 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
299 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
301 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
303 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete input; delete sabund; return 0; }
305 processedLabels.insert(sabund->getLabel());
306 userLabels.erase(sabund->getLabel());
308 //restore real lastlabel to save below
309 sabund->setLabel(saveLabel);
313 lastLabel = sabund->getLabel();
316 sabund = (input->getSAbundVector());
319 //output error messages about any remaining user labels
320 set<string>::iterator it;
321 bool needToRun = false;
322 for (it = userLabels.begin(); it != userLabels.end(); it++) {
323 m->mothurOut("Your file does not include the label " + *it);
324 if (processedLabels.count(lastLabel) != 1) {
325 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
328 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
332 //run last label if you need to
333 if (needToRun == true) {
334 if (sabund != NULL) { delete sabund; }
335 sabund = (input->getSAbundVector(lastLabel));
337 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
339 //create catchall input file from mothur's inputfile
340 string filename = process(sabund, inputFileNames[p]);
341 string outputPath = m->getPathName(filename);
343 //create system command
344 string catchAllCommand = "";
345 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
346 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
348 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
349 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
350 catchAllCommand = "\"" + catchAllCommand + "\"";
354 system(catchAllCommand.c_str());
356 remove(filename.c_str());
358 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
359 if (savedOutputDir == "") { filename = m->getSimpleName(filename); }
361 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
362 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
363 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
364 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
366 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
374 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } return 0; }
378 if (sharedfile == "") {
379 string summaryfilename = savedOutputDir + m->getRootName(m->getSimpleName(inputFileNames[0])) + "catchall.summary";
380 summaryfilename = m->getFullPathName(summaryfilename);
381 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
382 }else { //combine summaries
383 vector<string> sumNames;
384 for (int i = 0; i < inputFileNames.size(); i++) {
385 sumNames.push_back(m->getFullPathName(outputDir + m->getRootName(m->getSimpleName(inputFileNames[i])) + "catchall.summary"));
387 string summaryfilename = combineSummmary(sumNames);
388 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
390 globaldata->setSharedFile(sharedfile); globaldata->setFormat("sharedfile");
393 m->mothurOutEndLine();
394 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
395 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
396 m->mothurOutEndLine();
401 catch(exception& e) {
402 m->errorOut(e, "CatchAllCommand", "execute");
406 //**********************************************************************************************************************
407 string CatchAllCommand::process(SAbundVector* sabund, string file1) {
409 string filename = outputDir + m->getRootName(m->getSimpleName(file1)) + sabund->getLabel() + ".csv";
410 filename = m->getFullPathName(filename);
413 m->openOutputFile(filename, out);
415 for (int i = 1; i <= sabund->getMaxRank(); i++) {
416 int temp = sabund->get(i);
419 out << i << "," << temp << endl;
427 catch(exception& e) {
428 m->errorOut(e, "CatchAllCommand", "process");
432 //*********************************************************************************************************************
433 string CatchAllCommand::combineSummmary(vector<string>& outputNames) {
437 string combineFileName = savedOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "catchall.summary";
440 m->openOutputFile(combineFileName, out);
442 out << "label\tgroup\tmodel\testimate\tlci\tuci" << endl;
444 //open each groups summary file
445 string newLabel = "";
447 map<string, vector<string> > files;
448 for (int i=0; i<outputNames.size(); i++) {
449 vector<string> thisFilesLines;
452 m->openInputFile(outputNames[i], temp);
454 //read through first line - labels
459 while (!temp.eof()) {
461 string thisLine = "";
464 for (int j = 0; j < 5; j++) {
468 if (j == 1) { thisLine += groups[i] + "\t" + tempLabel + "\t"; }
469 else{ thisLine += tempLabel + "\t"; }
474 thisFilesLines.push_back(thisLine);
479 files[outputNames[i]] = thisFilesLines;
481 numLines = thisFilesLines.size();
484 remove(outputNames[i].c_str());
488 for (int k = 0; k < numLines; k++) {
490 //grab summary data for each group
491 for (int i=0; i<outputNames.size(); i++) {
492 out << files[outputNames[i]][k];
499 //return combine file name
500 return combineFileName;
503 catch(exception& e) {
504 m->errorOut(e, "CatchAllCommand", "combineSummmary");
508 //**********************************************************************************************************************
509 int CatchAllCommand::createSummaryFile(string file1, string label, ofstream& out) {
513 int able = m->openInputFile(file1, in, "noerror");
515 if (able == 1) { m->mothurOut("[ERROR]: the catchall program did not run properly. Please check to make sure it is located in the same folder as your mothur executable.");m->mothurOutEndLine(); m->control_pressed = true; return 0; }
519 string header = m->getline(in); m->gobble(in);
521 int pos = header.find("Total Number of Observed Species =");
522 string numString = "";
525 if (pos == string::npos) { m->mothurOut("[ERROR]: cannot parse " + file1); m->mothurOutEndLine(); }
527 //pos will be the position of the T in total, so we want to count to the position of =
538 if (pos > header.length()) { m->mothurOut("Cannot find number of OTUs in " + file1); m->mothurOutEndLine(); in.close(); return 0; }
542 string firstline = m->getline(in); m->gobble(in);
543 vector<string> values;
544 m->splitAtComma(firstline, values);
546 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
548 if (values.size() == 1) { //grab next line if firstline didn't have what you wanted
549 string secondline = m->getline(in); m->gobble(in);
551 m->splitAtComma(secondline, values);
553 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
556 if (values.size() == 1) { //still not what we wanted fill values with numOTUs
557 values.resize(8, "");
559 values[4] = numString;
560 values[6] = numString;
561 values[7] = numString;
564 if (values.size() < 8) { values.resize(8, ""); }
566 out << label << '\t' << values[1] << '\t' << values[4] << '\t' << values[6] << '\t' << values[7] << endl;
574 catch(exception& e) {
575 m->errorOut(e, "CatchAllCommand", "createSummaryFile");
579 //**********************************************************************************************************************
580 vector<string> CatchAllCommand::parseSharedFile(string filename) {
582 vector<string> filenames;
585 InputData* input = new InputData(filename, "sharedfile");
586 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
588 string sharedFileRoot = outputDir + m->getRootName(m->getSimpleName(filename));
590 //clears file before we start to write to it below
591 for (int i=0; i<lookup.size(); i++) {
592 remove((sharedFileRoot + lookup[i]->getGroup() + ".sabund").c_str());
593 filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".sabund"));
594 groups.push_back(lookup[i]->getGroup());
597 while(lookup[0] != NULL) {
599 for (int i = 0; i < lookup.size(); i++) {
600 SAbundVector sav = lookup[i]->getSAbundVector();
602 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".sabund", out);
607 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
608 lookup = input->getSharedRAbundVectors();
615 catch(exception& e) {
616 m->errorOut(e, "CatchAllCommand", "parseSharedFile");
620 /**************************************************************************************/