5 * Created by westcott on 5/11/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "catchallcommand.h"
11 #include "globaldata.hpp"
13 //**********************************************************************************************************************
14 vector<string> CatchAllCommand::getValidParameters(){
16 string AlignArray[] = {"sabund","shared","label","inputdir","outputdir"};
17 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
21 m->errorOut(e, "CatchAllCommand", "getValidParameters");
25 //**********************************************************************************************************************
26 CatchAllCommand::CatchAllCommand(){
28 abort = true; calledHelp = true;
29 //initialize outputTypes
30 vector<string> tempOutNames;
31 outputTypes["csv"] = tempOutNames;
32 outputTypes["summary"] = tempOutNames;
35 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
39 //**********************************************************************************************************************
40 vector<string> CatchAllCommand::getRequiredParameters(){
42 string AlignArray[] = {"sabund","shared","or"};
43 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
47 m->errorOut(e, "CatchAllCommand", "getRequiredParameters");
51 //**********************************************************************************************************************
52 vector<string> CatchAllCommand::getRequiredFiles(){
54 vector<string> myArray;
58 m->errorOut(e, "CatchAllCommand", "getRequiredFiles");
62 /**************************************************************************************/
63 CatchAllCommand::CatchAllCommand(string option) {
65 globaldata = GlobalData::getInstance();
66 abort = false; calledHelp = false;
69 //allow user to run help
70 if(option == "help") { help(); abort = true; calledHelp = true; }
73 //valid paramters for this command
74 string Array[] = {"shared","sabund","label","inputdir","outputdir"};
75 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77 OptionParser parser(option);
78 map<string,string> parameters = parser.getParameters();
80 ValidParameters validParameter;
81 map<string, string>::iterator it;
83 //check to make sure all parameters are valid for command
84 for (it = parameters.begin(); it != parameters.end(); it++) {
85 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
88 //initialize outputTypes
89 vector<string> tempOutNames;
90 outputTypes["csv"] = tempOutNames;
91 outputTypes["summary"] = tempOutNames;
93 //if the user changes the input directory command factory will send this info to us in the output parameter
94 string inputDir = validParameter.validFile(parameters, "inputdir", false);
95 if (inputDir == "not found"){ inputDir = ""; }
98 it = parameters.find("sabund");
99 //user has given a template file
100 if(it != parameters.end()){
101 path = m->hasPath(it->second);
102 //if the user has not given a path then, add inputdir. else leave path alone.
103 if (path == "") { parameters["sabund"] = inputDir + it->second; }
106 it = parameters.find("shared");
107 //user has given a template file
108 if(it != parameters.end()){
109 path = m->hasPath(it->second);
110 //if the user has not given a path then, add inputdir. else leave path alone.
111 if (path == "") { parameters["shared"] = inputDir + it->second; }
115 //check for required parameters
116 sabundfile = validParameter.validFile(parameters, "sabund", true);
117 if (sabundfile == "not open") { sabundfile = ""; abort = true; }
118 else if (sabundfile == "not found") { sabundfile = ""; }
119 else { globaldata->setSabundFile(sabundfile); globaldata->setFormat("sabund"); }
121 sharedfile = validParameter.validFile(parameters, "shared", true);
122 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
123 else if (sharedfile == "not found") { sharedfile = ""; }
125 //check for shared file loaded during read.otu
126 if (sharedfile == "") {
127 if (globaldata->getSharedFile() != "") { sharedfile = globaldata->getSharedFile(); }
130 string label = validParameter.validFile(parameters, "label", false);
131 if (label == "not found") { label = ""; }
133 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
134 else { allLines = 1; }
137 if ((sharedfile == "") && (sabundfile == "")) { m->mothurOut("You must provide a sabund or shared file for the catchall command."); m->mothurOutEndLine(); abort=true; }
139 //if the user changes the output directory command factory will send this info to us in the output parameter
140 outputDir = validParameter.validFile(parameters, "outputdir", false);
141 if (outputDir == "not found"){
142 if (sabundfile != "") { outputDir = m->hasPath(sabundfile); }
143 else { outputDir = m->hasPath(sharedfile); }
148 catch(exception& e) {
149 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
153 //**********************************************************************************************************************
155 void CatchAllCommand::help(){
157 m->mothurOut("The catchall command interfaces mothur with the catchall program written by Linda Woodard, Sean Connolly and John Bunge.\n");
158 m->mothurOut("For more information about catchall refer to http://www.northeastern.edu/catchall/index.html \n");
159 m->mothurOut("The catchall executable must be in the same folder as your mothur executable. \n");
160 m->mothurOut("If you are a MAC or Linux user you must also have installed mono, a link to mono is on the webpage. \n");
161 m->mothurOut("The catchall command parameters are shared, sabund and label. shared or sabund is required. \n");
162 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
163 m->mothurOut("The catchall command should be in the following format: \n");
164 m->mothurOut("catchall(sabund=yourSabundFile) \n");
165 m->mothurOut("Example: catchall(sabund=abrecovery.fn.sabund) \n");
167 catch(exception& e) {
168 m->errorOut(e, "CatchAllCommand", "help");
173 /**************************************************************************************/
174 int CatchAllCommand::execute() {
177 if (abort == true) { if (calledHelp) { return 0; } return 2; }
179 //get location of catchall
180 GlobalData* globaldata = GlobalData::getInstance();
181 path = globaldata->argv;
182 path = path.substr(0, (path.find_last_of("othur")-5));
183 path = m->getFullPathName(path);
185 string catchAllCommandExe = "";
186 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
187 catchAllCommandExe += "mono " + path + "CatchAllcmdL.exe ";
188 if (outputDir == "") { outputDir = "./"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
190 catchAllCommandExe += "\"" + path + "CatchAllcmdW.exe\"" + " ";
191 if (outputDir == "") { outputDir = ".\\"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
194 //prepare full output directory
195 outputDir = m->getFullPathName(outputDir);
197 vector<string> inputFileNames;
198 if (sharedfile != "") { inputFileNames = parseSharedFile(sharedfile); globaldata->setFormat("sabund"); }
199 else { inputFileNames.push_back(sabundfile); }
201 for (int p = 0; p < inputFileNames.size(); p++) {
202 if (inputFileNames.size() > 1) {
203 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
206 InputData* input = new InputData(inputFileNames[p], "sabund");
207 SAbundVector* sabund = input->getSAbundVector();
208 string lastLabel = sabund->getLabel();
210 set<string> processedLabels;
211 set<string> userLabels = labels;
213 string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "catchall.summary";
214 summaryfilename = m->getFullPathName(summaryfilename);
217 m->openOutputFile(summaryfilename, out);
219 out << "label\tmodel\testimate\tlci\tuci" << endl;
221 //for each label the user selected
222 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
225 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
226 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
228 //create catchall input file from mothur's inputfile
229 string filename = process(sabund, inputFileNames[p]);
230 string outputPath = m->getPathName(filename);
232 //create system command
233 string catchAllCommand = "";
234 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
235 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
237 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
238 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
239 //wrap entire string in ""
240 catchAllCommand = "\"" + catchAllCommand + "\"";
244 system(catchAllCommand.c_str());
246 remove(filename.c_str());
248 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
250 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
251 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
252 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
253 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
255 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
257 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete input; delete sabund; return 0; }
259 processedLabels.insert(sabund->getLabel());
260 userLabels.erase(sabund->getLabel());
263 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
264 string saveLabel = sabund->getLabel();
267 sabund = (input->getSAbundVector(lastLabel));
269 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
272 //create catchall input file from mothur's inputfile
273 string filename = process(sabund, inputFileNames[p]);
274 string outputPath = m->getPathName(filename);
276 //create system command
277 string catchAllCommand = "";
278 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
279 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
281 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
282 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
283 catchAllCommand = "\"" + catchAllCommand + "\"";
287 system(catchAllCommand.c_str());
289 remove(filename.c_str());
291 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
293 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
294 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
295 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
296 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
298 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
300 if (m->control_pressed) { out.close(); for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete input; delete sabund; return 0; }
302 processedLabels.insert(sabund->getLabel());
303 userLabels.erase(sabund->getLabel());
305 //restore real lastlabel to save below
306 sabund->setLabel(saveLabel);
310 lastLabel = sabund->getLabel();
313 sabund = (input->getSAbundVector());
316 //output error messages about any remaining user labels
317 set<string>::iterator it;
318 bool needToRun = false;
319 for (it = userLabels.begin(); it != userLabels.end(); it++) {
320 m->mothurOut("Your file does not include the label " + *it);
321 if (processedLabels.count(lastLabel) != 1) {
322 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
325 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
329 //run last label if you need to
330 if (needToRun == true) {
331 if (sabund != NULL) { delete sabund; }
332 sabund = (input->getSAbundVector(lastLabel));
334 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
336 //create catchall input file from mothur's inputfile
337 string filename = process(sabund, inputFileNames[p]);
338 string outputPath = m->getPathName(filename);
340 //create system command
341 string catchAllCommand = "";
342 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
343 catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
345 if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
346 catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + "\" 1";
347 catchAllCommand = "\"" + catchAllCommand + "\"";
351 system(catchAllCommand.c_str());
353 remove(filename.c_str());
355 filename = m->getRootName(filename); filename = filename.substr(0, filename.length()-1); //rip off extra .
357 outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
358 outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
359 outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
360 outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
362 createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel(), out);
370 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } return 0; }
374 if (sharedfile == "") {
375 string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[0])) + "catchall.summary";
376 summaryfilename = m->getFullPathName(summaryfilename);
377 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
378 }else { //combine summaries
379 vector<string> sumNames;
380 for (int i = 0; i < inputFileNames.size(); i++) {
381 sumNames.push_back(m->getFullPathName(outputDir + m->getRootName(m->getSimpleName(inputFileNames[i])) + "catchall.summary"));
383 string summaryfilename = combineSummmary(sumNames);
384 outputNames.push_back(summaryfilename); outputTypes["summary"].push_back(summaryfilename);
387 m->mothurOutEndLine();
388 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
389 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
390 m->mothurOutEndLine();
395 catch(exception& e) {
396 m->errorOut(e, "CatchAllCommand", "execute");
400 //**********************************************************************************************************************
401 string CatchAllCommand::process(SAbundVector* sabund, string file1) {
403 string filename = outputDir + m->getRootName(m->getSimpleName(file1)) + sabund->getLabel() + ".csv";
404 filename = m->getFullPathName(filename);
407 m->openOutputFile(filename, out);
409 for (int i = 1; i <= sabund->getMaxRank(); i++) {
410 int temp = sabund->get(i);
413 out << i << "," << temp << endl;
421 catch(exception& e) {
422 m->errorOut(e, "CatchAllCommand", "process");
426 //*********************************************************************************************************************
427 string CatchAllCommand::combineSummmary(vector<string>& outputNames) {
431 string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "catchall.summary";
434 m->openOutputFile(combineFileName, out);
436 out << "label\tgroup\tmodel\testimate\tlci\tuci" << endl;
438 //open each groups summary file
439 string newLabel = "";
441 map<string, vector<string> > files;
442 for (int i=0; i<outputNames.size(); i++) {
443 vector<string> thisFilesLines;
446 m->openInputFile(outputNames[i], temp);
448 //read through first line - labels
453 while (!temp.eof()) {
455 string thisLine = "";
458 for (int j = 0; j < 5; j++) {
462 if (j == 1) { thisLine += groups[i] + "\t" + tempLabel + "\t"; }
463 else{ thisLine += tempLabel + "\t"; }
468 thisFilesLines.push_back(thisLine);
473 files[outputNames[i]] = thisFilesLines;
475 numLines = thisFilesLines.size();
478 remove(outputNames[i].c_str());
482 for (int k = 0; k < numLines; k++) {
484 //grab summary data for each group
485 for (int i=0; i<outputNames.size(); i++) {
486 out << files[outputNames[i]][k];
493 //return combine file name
494 return combineFileName;
497 catch(exception& e) {
498 m->errorOut(e, "CatchAllCommand", "combineSummmary");
502 //**********************************************************************************************************************
503 int CatchAllCommand::createSummaryFile(string file1, string label, ofstream& out) {
507 int able = m->openInputFile(file1, in, "noerror");
509 if (able == 1) { m->mothurOut("[ERROR]: the catchall program did not run properly. Please check to make sure it is located in the same folder as your mothur executable.");m->mothurOutEndLine(); m->control_pressed = true; return 0; }
513 string header = m->getline(in); m->gobble(in);
515 int pos = header.find("Total Number of Observed Species =");
516 string numString = "";
519 if (pos == string::npos) { m->mothurOut("[ERROR]: cannot parse " + file1); m->mothurOutEndLine(); }
521 //pos will be the position of the T in total, so we want to count to the position of =
532 if (pos > header.length()) { m->mothurOut("Cannot find number of OTUs in " + file1); m->mothurOutEndLine(); in.close(); return 0; }
536 string firstline = m->getline(in); m->gobble(in);
537 vector<string> values;
538 m->splitAtComma(firstline, values);
540 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
542 if (values.size() == 1) { //grab next line if firstline didn't have what you wanted
543 string secondline = m->getline(in); m->gobble(in);
545 m->splitAtComma(secondline, values);
547 values.pop_back(); //last value is always a blank string since the last character in the line is always a ','
550 if (values.size() == 1) { //still not what we wanted fill values with numOTUs
551 values.resize(8, "");
553 values[4] = numString;
554 values[6] = numString;
555 values[7] = numString;
558 if (values.size() < 8) { values.resize(8, ""); }
560 out << label << '\t' << values[1] << '\t' << values[4] << '\t' << values[6] << '\t' << values[7] << endl;
568 catch(exception& e) {
569 m->errorOut(e, "CatchAllCommand", "createSummaryFile");
573 //**********************************************************************************************************************
574 vector<string> CatchAllCommand::parseSharedFile(string filename) {
576 vector<string> filenames;
579 InputData* input = new InputData(filename, "sharedfile");
580 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
582 string sharedFileRoot = outputDir + m->getRootName(m->getSimpleName(filename));
584 //clears file before we start to write to it below
585 for (int i=0; i<lookup.size(); i++) {
586 remove((sharedFileRoot + lookup[i]->getGroup() + ".sabund").c_str());
587 filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".sabund"));
588 groups.push_back(lookup[i]->getGroup());
591 while(lookup[0] != NULL) {
593 for (int i = 0; i < lookup.size(); i++) {
594 SAbundVector sav = lookup[i]->getSAbundVector();
596 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".sabund", out);
601 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
602 lookup = input->getSharedRAbundVectors();
609 catch(exception& e) {
610 m->errorOut(e, "CatchAllCommand", "parseSharedFile");
614 /**************************************************************************************/