5 * Created by westcott on 9/16/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::setParameters(){
17 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
18 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
19 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
20 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
22 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
24 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "MetaStatsCommand", "setParameters");
37 //**********************************************************************************************************************
38 string MetaStatsCommand::getHelpString(){
40 string helpString = "";
41 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
42 helpString += "The metastats command outputs a .metastats file. \n";
43 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors. The shared and design parameters are required, unless you have valid current files.\n";
44 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
45 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
46 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
47 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n";
48 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
49 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
50 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
51 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
52 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
53 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
54 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
55 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59 m->errorOut(e, "MetaStatsCommand", "getHelpString");
63 //**********************************************************************************************************************
64 MetaStatsCommand::MetaStatsCommand(){
66 abort = true; calledHelp = true;
68 vector<string> tempOutNames;
69 outputTypes["metastats"] = tempOutNames;
72 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
76 //**********************************************************************************************************************
78 MetaStatsCommand::MetaStatsCommand(string option) {
80 abort = false; calledHelp = false;
84 //allow user to run help
85 if(option == "help") { help(); abort = true; calledHelp = true; }
86 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
89 vector<string> myArray = setParameters();
91 OptionParser parser(option);
92 map<string,string> parameters = parser.getParameters();
94 ValidParameters validParameter;
96 //check to make sure all parameters are valid for command
97 map<string,string>::iterator it;
98 for (it = parameters.begin(); it != parameters.end(); it++) {
99 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
102 //initialize outputTypes
103 vector<string> tempOutNames;
104 outputTypes["metastats"] = tempOutNames;
107 //if the user changes the input directory command factory will send this info to us in the output parameter
108 string inputDir = validParameter.validFile(parameters, "inputdir", false);
109 if (inputDir == "not found"){ inputDir = ""; }
112 it = parameters.find("design");
113 //user has given a template file
114 if(it != parameters.end()){
115 path = m->hasPath(it->second);
116 //if the user has not given a path then, add inputdir. else leave path alone.
117 if (path == "") { parameters["design"] = inputDir + it->second; }
120 it = parameters.find("shared");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["shared"] = inputDir + it->second; }
130 //check for required parameters
131 sharedfile = validParameter.validFile(parameters, "shared", true);
132 if (sharedfile == "not open") { abort = true; }
133 else if (sharedfile == "not found") { //if there is a current shared file, use it
134 sharedfile = m->getSharedFile();
135 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
136 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
137 }else { m->setSharedFile(sharedfile); }
139 //check for required parameters
140 designfile = validParameter.validFile(parameters, "design", true);
141 if (designfile == "not open") { abort = true; }
142 else if (designfile == "not found") {
143 //if there is a current design file, use it
144 designfile = m->getDesignFile();
145 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
146 else { m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
147 }else { m->setDesignFile(designfile); }
149 //if the user changes the output directory command factory will send this info to us in the output parameter
150 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
152 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it
155 //check for optional parameter and set defaults
156 // ...at some point should added some additional type checking...
157 label = validParameter.validFile(parameters, "label", false);
158 if (label == "not found") { label = ""; }
160 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
161 else { allLines = 1; }
164 groups = validParameter.validFile(parameters, "groups", false);
165 if (groups == "not found") { groups = ""; pickedGroups = false; }
168 m->splitAtDash(groups, Groups);
172 sets = validParameter.validFile(parameters, "sets", false);
173 if (sets == "not found") { sets = ""; }
175 m->splitAtDash(sets, Sets);
179 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
180 convert(temp, iters);
182 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; }
183 convert(temp, threshold);
185 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
186 m->setProcessors(temp);
187 convert(temp, processors);
191 catch(exception& e) {
192 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
196 //**********************************************************************************************************************
198 int MetaStatsCommand::execute(){
201 if (abort == true) { if (calledHelp) { return 0; } return 2; }
203 designMap = new GroupMap(designfile);
204 designMap->readDesignMap();
206 input = new InputData(sharedfile, "sharedfile");
207 lookup = input->getSharedRAbundVectors();
208 string lastLabel = lookup[0]->getLabel();
210 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
211 set<string> processedLabels;
212 set<string> userLabels = labels;
214 //setup the pairwise comparions of sets for metastats
215 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
216 //make sure sets are all in designMap
217 SharedUtil* util = new SharedUtil();
218 util->setGroups(Sets, designMap->namesOfGroups);
221 int numGroups = Sets.size();
222 for (int a=0; a<numGroups; a++) {
223 for (int l = 0; l < a; l++) {
224 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
225 namesOfGroupCombos.push_back(groups);
231 if (numGroups == 2) { processors = 1; }
232 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
234 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
236 int numPairs = namesOfGroupCombos.size();
237 int numPairsPerProcessor = numPairs / processors;
239 for (int i = 0; i < processors; i++) {
240 int startPos = i * numPairsPerProcessor;
241 if(i == processors - 1){
242 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
244 lines.push_back(linePair(startPos, numPairsPerProcessor));
249 //as long as you are not at the end of the file or done wih the lines you want
250 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
252 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
254 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
256 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
259 processedLabels.insert(lookup[0]->getLabel());
260 userLabels.erase(lookup[0]->getLabel());
263 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
264 string saveLabel = lookup[0]->getLabel();
266 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
267 lookup = input->getSharedRAbundVectors(lastLabel);
268 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
272 processedLabels.insert(lookup[0]->getLabel());
273 userLabels.erase(lookup[0]->getLabel());
275 //restore real lastlabel to save below
276 lookup[0]->setLabel(saveLabel);
279 lastLabel = lookup[0]->getLabel();
280 //prevent memory leak
281 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
283 if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
285 //get next line to process
286 lookup = input->getSharedRAbundVectors();
289 if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
291 //output error messages about any remaining user labels
292 set<string>::iterator it;
293 bool needToRun = false;
294 for (it = userLabels.begin(); it != userLabels.end(); it++) {
295 m->mothurOut("Your file does not include the label " + *it);
296 if (processedLabels.count(lastLabel) != 1) {
297 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
300 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
304 //run last label if you need to
305 if (needToRun == true) {
306 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
307 lookup = input->getSharedRAbundVectors(lastLabel);
309 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
313 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
316 //reset groups parameter
321 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0;}
323 m->mothurOutEndLine();
324 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
325 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
326 m->mothurOutEndLine();
330 catch(exception& e) {
331 m->errorOut(e, "MetaStatsCommand", "execute");
335 //**********************************************************************************************************************
337 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
340 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
342 driver(0, namesOfGroupCombos.size(), thisLookUp);
345 vector<int> processIDS;
347 //loop through and create all the processes you want
348 while (process != processors) {
352 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
355 driver(lines[process].start, lines[process].num, thisLookUp);
358 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
359 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
365 driver(lines[0].start, lines[0].num, thisLookUp);
367 //force parent to wait until all the processes are done
368 for (int i=0;i<(processors-1);i++) {
369 int temp = processIDS[i];
374 driver(0, namesOfGroupCombos.size(), thisLookUp);
380 catch(exception& e) {
381 m->errorOut(e, "MetaStatsCommand", "process");
385 //**********************************************************************************************************************
386 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) {
390 for (int c = start; c < (start+num); c++) {
393 string setA = namesOfGroupCombos[c][0];
394 string setB = namesOfGroupCombos[c][1];
397 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
398 outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
399 int nameLength = outputFileName.length();
400 char * output = new char[nameLength];
401 strcpy(output, outputFileName.c_str());
403 //build matrix from shared rabunds
405 data = new double*[thisLookUp[0]->getNumBins()];
407 vector<SharedRAbundVector*> subset;
410 for (int i = 0; i < thisLookUp.size(); i++) {
411 string thisGroup = thisLookUp[i]->getGroup();
413 //is this group for a set we want to compare??
414 //sorting the sets by putting setB at the back and setA in the front
415 if ((designMap->getGroup(thisGroup) == setB)) {
416 subset.push_back(thisLookUp[i]);
418 }else if ((designMap->getGroup(thisGroup) == setA)) {
419 subset.insert(subset.begin()+setACount, thisLookUp[i]);
424 if ((setACount == 0) || (setBCount == 0)) {
425 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
426 outputNames.pop_back();
429 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
430 data[j] = new double[subset.size()];
431 for (int i = 0; i < subset.size(); i++) {
432 data[j][i] = (subset[i]->getAbundance(j));
436 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
437 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
439 m->mothurOutEndLine();
444 for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; }
451 catch(exception& e) {
452 m->errorOut(e, "MetaStatsCommand", "driver");
456 //**********************************************************************************************************************