5 * Created by westcott on 5/11/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "catchallcommand.h"
11 #include "globaldata.hpp"
13 //**********************************************************************************************************************
14 vector<string> CatchAllCommand::getValidParameters(){
16 string AlignArray[] = {"sabund","shared","label","inputdir","outputdir"};
17 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
21 m->errorOut(e, "CatchAllCommand", "getValidParameters");
25 //**********************************************************************************************************************
26 CatchAllCommand::CatchAllCommand(){
28 abort = true; calledHelp = true;
29 //initialize outputTypes
30 vector<string> tempOutNames;
31 outputTypes["csv"] = tempOutNames;
32 outputTypes["summary"] = tempOutNames;
35 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
39 //**********************************************************************************************************************
40 vector<string> CatchAllCommand::getRequiredParameters(){
42 string AlignArray[] = {"sabund","shared","or"};
43 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
47 m->errorOut(e, "CatchAllCommand", "getRequiredParameters");
51 //**********************************************************************************************************************
52 vector<string> CatchAllCommand::getRequiredFiles(){
54 vector<string> myArray;
58 m->errorOut(e, "CatchAllCommand", "getRequiredFiles");
62 /**************************************************************************************/
63 CatchAllCommand::CatchAllCommand(string option) {
65 globaldata = GlobalData::getInstance();
66 abort = false; calledHelp = false;
69 //allow user to run help
70 if(option == "help") { help(); abort = true; calledHelp = true; }
73 //valid paramters for this command
74 string Array[] = {"shared","sabund","label","inputdir","outputdir"};
75 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77 OptionParser parser(option);
78 map<string,string> parameters = parser.getParameters();
80 ValidParameters validParameter;
81 map<string, string>::iterator it;
83 //check to make sure all parameters are valid for command
84 for (it = parameters.begin(); it != parameters.end(); it++) {
85 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
88 //initialize outputTypes
89 vector<string> tempOutNames;
90 outputTypes["csv"] = tempOutNames;
91 outputTypes["summary"] = tempOutNames;
93 //if the user changes the input directory command factory will send this info to us in the output parameter
94 string inputDir = validParameter.validFile(parameters, "inputdir", false);
95 if (inputDir == "not found"){ inputDir = ""; }
98 it = parameters.find("sabund");
99 //user has given a template file
100 if(it != parameters.end()){
101 path = m->hasPath(it->second);
102 //if the user has not given a path then, add inputdir. else leave path alone.
103 if (path == "") { parameters["sabund"] = inputDir + it->second; }
106 it = parameters.find("shared");
107 //user has given a template file
108 if(it != parameters.end()){
109 path = m->hasPath(it->second);
110 //if the user has not given a path then, add inputdir. else leave path alone.
111 if (path == "") { parameters["shared"] = inputDir + it->second; }
115 //check for required parameters
116 sabundfile = validParameter.validFile(parameters, "sabund", true);
117 if (sabundfile == "not open") { sabundfile = ""; abort = true; }
118 else if (sabundfile == "not found") { sabundfile = ""; }
119 else { globaldata->setSabundFile(sabundfile); globaldata->setFormat("sabund"); }
121 sharedfile = validParameter.validFile(parameters, "shared", true);
122 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
123 else if (sharedfile == "not found") { sharedfile = ""; }
125 //check for shared file loaded during read.otu
126 if (sharedfile == "") {
127 if (globaldata->getSharedFile() != "") { sharedfile = globaldata->getSharedFile(); }
130 string label = validParameter.validFile(parameters, "label", false);
131 if (label == "not found") { label = ""; }
133 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
134 else { allLines = 1; }
137 if ((sharedfile == "") && (sabundfile == "")) { m->mothurOut("You must provide a sabund or shared file for the catchall command."); m->mothurOutEndLine(); abort=true; }
139 //if the user changes the output directory command factory will send this info to us in the output parameter
140 outputDir = validParameter.validFile(parameters, "outputdir", false);
141 if (outputDir == "not found"){
142 if (sabundfile != "") { outputDir = m->hasPath(sabundfile); }
143 else { outputDir = m->hasPath(sharedfile); }
148 catch(exception& e) {
149 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
153 //**********************************************************************************************************************
155 void CatchAllCommand::help(){
157 m->mothurOut("The catchall command interfaces mothur with the catchall program written by Linda Woodard, Sean Connolly and John Bunge.\n");
158 m->mothurOut("For more information about catchall refer to http://www.northeastern.edu/catchall/index.html \n");
159 m->mothurOut("The catchall executable must be in the same folder as your mothur executable. \n");
160 m->mothurOut("If you are a MAC or Linux user you must also have installed mono, a link to mono is on the webpage. \n");
161 m->mothurOut("The catchall command parameters are shared, sabund and label. shared or sabund is required. \n");
162 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
163 m->mothurOut("The catchall command should be in the following format: \n");
164 m->mothurOut("catchall(sabund=yourSabundFile) \n");
165 m->mothurOut("Example: catchall(sabund=abrecovery.fn.sabund) \n");
167 catch(exception& e) {
168 m->errorOut(e, "CatchAllCommand", "help");
173 /**************************************************************************************/
174 int CatchAllCommand::execute() {
177 if (abort == true) { if (calledHelp) { return 0; } return 2; }
179 //prepare full output directory
180 outputDir = m->getFullPathName(outputDir);
182 //get location of catchall
183 GlobalData* globaldata = GlobalData::getInstance();
184 path = globaldata->argv;
185 path = path.substr(0, (path.find_last_of('m')));
186 path = m->getFullPathName(path);
188 string catchAllCommandExe = "";
189 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
190 catchAllCommandExe += "mono " + path + "CatchAllcmdL.exe ";
192 catchAllCommandExe += "\"" + path + "CatchAllcmdW.exe\"" + " ";
195 vector<string> inputFileNames;
196 if (sharedfile != "") { inputFileNames = parseSharedFile(sharedfile); globaldata->setFormat("sabund"); }
197 else { inputFileNames.push_back(sabundfile); }
199 for (int p = 0; p < inputFileNames.size(); p++) {
200 if (inputFileNames.size() > 1) {
201 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
204 InputData* input = new InputData(inputFileNames[p], "sabund");
205 SAbundVector* sabund = input->getSAbundVector();
206 string lastLabel = sabund->getLabel();
208 set<string> processedLabels;
209 set<string> userLabels = labels;
211 string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "catchall.summary";
212 summaryfilename = m->getFullPathName(summaryfilename);
215 m->openOutputFile(summaryfilename, out);
217 out << "label\tmodel\testimate\tlci\tuci" << endl;
219 //for each label the user selected
220 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
223 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
224 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
226 //create catchall input file from mothur's inputfile
227 string filename = process(sabund, inputFileNames[p]);
228 string outputPath = m->getPathName(filename);
230 //create system command
231 string catchAllCommand = "";
232 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
233 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
235 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
236 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
237 //wrap entire string in ""
238 catchAllCommand = "\"" + catchAllCommand + "\"";
241 system(catchAllCommand.c_str());
243 remove(filename.c_str());
245 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
247 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
248 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
249 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
250 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
252 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
254 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete input; delete sabund; return 0; }
256 processedLabels.insert(sabund->getLabel());
257 userLabels.erase(sabund->getLabel());
260 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
261 string saveLabel = sabund->getLabel();
264 sabund = (input->getSAbundVector(lastLabel));
266 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
269 //create catchall input file from mothur's inputfile
270 string filename = process(sabund, inputFileNames[p]);
271 string outputPath = m->getPathName(filename);
273 //create system command
274 string catchAllCommand = "";
275 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
276 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
278 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
279 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
280 catchAllCommand = "\"" + catchAllCommand + "\"";
284 system(catchAllCommand.c_str());
286 remove(filename.c_str());
288 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
290 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
291 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
292 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
293 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
295 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
297 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete input; delete sabund; return 0; }
299 processedLabels.insert(sabund->getLabel());
300 userLabels.erase(sabund->getLabel());
302 //restore real lastlabel to save below
303 sabund->setLabel(saveLabel);
307 lastLabel = sabund->getLabel();
310 sabund = (input->getSAbundVector());
313 //output error messages about any remaining user labels
314 set<string>::iterator it;
315 bool needToRun = false;
316 for (it = userLabels.begin(); it != userLabels.end(); it++) {
317 m->mothurOut("Your file does not include the label " + *it);
318 if (processedLabels.count(lastLabel) != 1) {
319 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
322 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
326 //run last label if you need to
327 if (needToRun == true) {
328 if (sabund != NULL) { delete sabund; }
329 sabund = (input->getSAbundVector(lastLabel));
331 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
333 //create catchall input file from mothur's inputfile
334 string filename = process(sabund, inputFileNames[p]);
335 string outputPath = m->getPathName(filename);
337 //create system command
338 string catchAllCommand = "";
339 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
340 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
342 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
343 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
344 catchAllCommand = "\"" + catchAllCommand + "\"";
348 system(catchAllCommand.c_str());
350 remove(filename.c_str());
352 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
354 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
355 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
356 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
357 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
359 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
367 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } return 0; }
371 if (sharedfile == "") {
372 string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[0])) + "catchall.summary";
373 summaryfilename = m->getFullPathName(summaryfilename);
374 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
375 }else { //combine summaries
376 vector<string> sumNames;
377 for (int i = 0; i < inputFileNames.size(); i++) {
378 sumNames.push_back(m->getFullPathName(outputDir + m->getRootName(m->getSimpleName(inputFileNames[i])) + "catchall.summary"));
380 string summaryfilename = combineSummmary(sumNames);
381 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
384 m->mothurOutEndLine();
385 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
386 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
387 m->mothurOutEndLine();
392 catch(exception& e) {
393 m->errorOut(e, "CatchAllCommand", "execute");
397 //**********************************************************************************************************************
398 string CatchAllCommand::process(SAbundVector* sabund, string file1) {
400 string filename = outputDir + m->getRootName(m->getSimpleName(file1)) + sabund->getLabel() + ".csv";
401 filename = m->getFullPathName(filename);
404 m->openOutputFile(filename, out);
406 for (int i = 1; i <= sabund->getMaxRank(); i++) {
407 int temp = sabund->get(i);
410 out << i << "," << temp << endl;
418 catch(exception& e) {
419 m->errorOut(e, "CatchAllCommand", "process");
423 //*********************************************************************************************************************
424 string CatchAllCommand::combineSummmary(vector<string>& outputNames) {
428 string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "catchall.summary";
431 m->openOutputFile(combineFileName, out);
433 out << "label\tgroup\tmodel\testimate\tlci\tuci" << endl;
435 //open each groups summary file
436 string newLabel = "";
438 map<string, vector<string> > files;
439 for (int i=0; i<outputNames.size(); i++) {
440 vector<string> thisFilesLines;
443 m->openInputFile(outputNames[i], temp);
445 //read through first line - labels
450 while (!temp.eof()) {
452 string thisLine = "";
455 for (int j = 0; j < 5; j++) {
459 if (j == 1) { thisLine += groups[i] + "\t" + tempLabel + "\t"; }
460 else{ thisLine += tempLabel + "\t"; }
465 thisFilesLines.push_back(thisLine);
470 files[outputNames[i]] = thisFilesLines;
472 numLines = thisFilesLines.size();
475 remove(outputNames[i].c_str());
479 for (int k = 0; k < numLines; k++) {
481 //grab summary data for each group
482 for (int i=0; i<outputNames.size(); i++) {
483 out << files[outputNames[i]][k];
490 //return combine file name
491 return combineFileName;
494 catch(exception& e) {
495 m->errorOut(e, "CatchAllCommand", "combineSummmary");
499 //**********************************************************************************************************************
500 int CatchAllCommand::createSummaryFile(string file1, string label, ofstream& out) {
504 int able = m->openInputFile(file1, in, "noerror");
506 if (able == 1) { m->mothurOut("[ERROR]: the catchall program did not run properly. Please check to make sure it is located in the same folder as your mothur executable.");m->mothurOutEndLine(); m->control_pressed = true; return 0; }
510 string header = m->getline(in); m->gobble(in);
512 int pos = header.find("Total Number of Observed Species =");
513 string numString = "";
516 if (pos == string::npos) { m->mothurOut("[ERROR]: cannot parse " + file1); m->mothurOutEndLine(); }
518 //pos will be the position of the T in total, so we want to count to the position of =
529 if (pos > header.length()) { m->mothurOut("Cannot find number of OTUs in " + file1); m->mothurOutEndLine(); in.close(); return 0; }
533 string firstline = m->getline(in); m->gobble(in);
534 vector<string> values;
535 m->splitAtComma(firstline, values);
537 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
539 if (values.size() == 1) { //grab next line if firstline didn't have what you wanted
540 string secondline = m->getline(in); m->gobble(in);
542 m->splitAtComma(secondline, values);
544 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
547 if (values.size() == 1) { //still not what we wanted fill values with numOTUs
548 values.resize(8, "");
550 values[4] = numString;
551 values[6] = numString;
552 values[7] = numString;
555 if (values.size() < 8) { values.resize(8, ""); }
557 out << label << '\t' << values[1] << '\t' << values[4] << '\t' << values[6] << '\t' << values[7] << endl;
565 catch(exception& e) {
566 m->errorOut(e, "CatchAllCommand", "createSummaryFile");
570 //**********************************************************************************************************************
571 vector<string> CatchAllCommand::parseSharedFile(string filename) {
573 vector<string> filenames;
576 InputData* input = new InputData(filename, "sharedfile");
577 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
579 string sharedFileRoot = outputDir + m->getRootName(m->getSimpleName(filename));
581 //clears file before we start to write to it below
582 for (int i=0; i<lookup.size(); i++) {
583 remove((sharedFileRoot + lookup[i]->getGroup() + ".sabund").c_str());
584 filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".sabund"));
585 groups.push_back(lookup[i]->getGroup());
588 while(lookup[0] != NULL) {
590 for (int i = 0; i < lookup.size(); i++) {
591 SAbundVector sav = lookup[i]->getSAbundVector();
593 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".sabund", out);
598 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
599 lookup = input->getSharedRAbundVectors();
606 catch(exception& e) {
607 m->errorOut(e, "CatchAllCommand", "parseSharedFile");
611 /**************************************************************************************/