5 * Created by westcott on 9/16/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::setParameters(){
17 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
18 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
19 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
20 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
22 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
24 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "MetaStatsCommand", "setParameters");
37 //**********************************************************************************************************************
38 string MetaStatsCommand::getHelpString(){
40 string helpString = "";
41 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
42 helpString += "The metastats command outputs a .metastats file. \n";
43 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors. The shared and design parameters are required, unless you have valid current files.\n";
44 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
45 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
46 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
47 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n";
48 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
49 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
50 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
51 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
52 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
53 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
54 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
55 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59 m->errorOut(e, "MetaStatsCommand", "getHelpString");
63 //**********************************************************************************************************************
64 MetaStatsCommand::MetaStatsCommand(){
66 abort = true; calledHelp = true;
68 vector<string> tempOutNames;
69 outputTypes["metastats"] = tempOutNames;
72 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
76 //**********************************************************************************************************************
78 MetaStatsCommand::MetaStatsCommand(string option) {
80 abort = false; calledHelp = false;
84 //allow user to run help
85 if(option == "help") { help(); abort = true; calledHelp = true; }
88 vector<string> myArray = setParameters();
90 OptionParser parser(option);
91 map<string,string> parameters = parser.getParameters();
93 ValidParameters validParameter;
95 //check to make sure all parameters are valid for command
96 map<string,string>::iterator it;
97 for (it = parameters.begin(); it != parameters.end(); it++) {
98 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
101 //initialize outputTypes
102 vector<string> tempOutNames;
103 outputTypes["metastats"] = tempOutNames;
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("design");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["design"] = inputDir + it->second; }
119 it = parameters.find("shared");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["shared"] = inputDir + it->second; }
129 //check for required parameters
130 sharedfile = validParameter.validFile(parameters, "shared", true);
131 if (sharedfile == "not open") { abort = true; }
132 else if (sharedfile == "not found") { //if there is a current shared file, use it
133 sharedfile = m->getSharedFile();
134 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
135 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
138 //check for required parameters
139 designfile = validParameter.validFile(parameters, "design", true);
140 if (designfile == "not open") { abort = true; }
141 else if (designfile == "not found") {
142 //if there is a current design file, use it
143 designfile = m->getDesignFile();
144 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
145 else { m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
148 //if the user changes the output directory command factory will send this info to us in the output parameter
149 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
151 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it
154 //check for optional parameter and set defaults
155 // ...at some point should added some additional type checking...
156 label = validParameter.validFile(parameters, "label", false);
157 if (label == "not found") { label = ""; }
159 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
160 else { allLines = 1; }
163 groups = validParameter.validFile(parameters, "groups", false);
164 if (groups == "not found") { groups = ""; pickedGroups = false; }
167 m->splitAtDash(groups, Groups);
171 sets = validParameter.validFile(parameters, "sets", false);
172 if (sets == "not found") { sets = ""; }
174 m->splitAtDash(sets, Sets);
178 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
179 convert(temp, iters);
181 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; }
182 convert(temp, threshold);
184 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
185 m->setProcessors(temp);
186 convert(temp, processors);
190 catch(exception& e) {
191 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
195 //**********************************************************************************************************************
197 int MetaStatsCommand::execute(){
200 if (abort == true) { if (calledHelp) { return 0; } return 2; }
202 designMap = new GroupMap(designfile);
203 designMap->readDesignMap();
205 input = new InputData(sharedfile, "sharedfile");
206 lookup = input->getSharedRAbundVectors();
207 string lastLabel = lookup[0]->getLabel();
209 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
210 set<string> processedLabels;
211 set<string> userLabels = labels;
213 //setup the pairwise comparions of sets for metastats
214 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
215 //make sure sets are all in designMap
216 SharedUtil* util = new SharedUtil();
217 util->setGroups(Sets, designMap->namesOfGroups);
220 int numGroups = Sets.size();
221 for (int a=0; a<numGroups; a++) {
222 for (int l = 0; l < a; l++) {
223 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
224 namesOfGroupCombos.push_back(groups);
230 if (numGroups == 2) { processors = 1; }
231 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
233 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
235 int numPairs = namesOfGroupCombos.size();
236 int numPairsPerProcessor = numPairs / processors;
238 for (int i = 0; i < processors; i++) {
239 int startPos = i * numPairsPerProcessor;
240 if(i == processors - 1){
241 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
243 lines.push_back(linePair(startPos, numPairsPerProcessor));
248 //as long as you are not at the end of the file or done wih the lines you want
249 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
251 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
253 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
255 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
258 processedLabels.insert(lookup[0]->getLabel());
259 userLabels.erase(lookup[0]->getLabel());
262 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
263 string saveLabel = lookup[0]->getLabel();
265 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
266 lookup = input->getSharedRAbundVectors(lastLabel);
267 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
271 processedLabels.insert(lookup[0]->getLabel());
272 userLabels.erase(lookup[0]->getLabel());
274 //restore real lastlabel to save below
275 lookup[0]->setLabel(saveLabel);
278 lastLabel = lookup[0]->getLabel();
279 //prevent memory leak
280 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
282 if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
284 //get next line to process
285 lookup = input->getSharedRAbundVectors();
288 if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
290 //output error messages about any remaining user labels
291 set<string>::iterator it;
292 bool needToRun = false;
293 for (it = userLabels.begin(); it != userLabels.end(); it++) {
294 m->mothurOut("Your file does not include the label " + *it);
295 if (processedLabels.count(lastLabel) != 1) {
296 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
299 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
303 //run last label if you need to
304 if (needToRun == true) {
305 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
306 lookup = input->getSharedRAbundVectors(lastLabel);
308 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
312 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
315 //reset groups parameter
320 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0;}
322 m->mothurOutEndLine();
323 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
324 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
325 m->mothurOutEndLine();
329 catch(exception& e) {
330 m->errorOut(e, "MetaStatsCommand", "execute");
334 //**********************************************************************************************************************
336 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
339 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
341 driver(0, namesOfGroupCombos.size(), thisLookUp);
344 vector<int> processIDS;
346 //loop through and create all the processes you want
347 while (process != processors) {
351 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
354 driver(lines[process].start, lines[process].num, thisLookUp);
357 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
358 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
364 driver(lines[0].start, lines[0].num, thisLookUp);
366 //force parent to wait until all the processes are done
367 for (int i=0;i<(processors-1);i++) {
368 int temp = processIDS[i];
373 driver(0, namesOfGroupCombos.size(), thisLookUp);
379 catch(exception& e) {
380 m->errorOut(e, "MetaStatsCommand", "process");
384 //**********************************************************************************************************************
385 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) {
389 for (int c = start; c < (start+num); c++) {
392 string setA = namesOfGroupCombos[c][0];
393 string setB = namesOfGroupCombos[c][1];
396 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
397 outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
398 int nameLength = outputFileName.length();
399 char * output = new char[nameLength];
400 strcpy(output, outputFileName.c_str());
402 //build matrix from shared rabunds
404 data = new double*[thisLookUp[0]->getNumBins()];
406 vector<SharedRAbundVector*> subset;
409 for (int i = 0; i < thisLookUp.size(); i++) {
410 string thisGroup = thisLookUp[i]->getGroup();
412 //is this group for a set we want to compare??
413 //sorting the sets by putting setB at the back and setA in the front
414 if ((designMap->getGroup(thisGroup) == setB)) {
415 subset.push_back(thisLookUp[i]);
417 }else if ((designMap->getGroup(thisGroup) == setA)) {
418 subset.insert(subset.begin()+setACount, thisLookUp[i]);
423 if ((setACount == 0) || (setBCount == 0)) {
424 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
425 outputNames.pop_back();
428 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
429 data[j] = new double[subset.size()];
430 for (int i = 0; i < subset.size(); i++) {
431 data[j][i] = (subset[i]->getAbundance(j));
435 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
436 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
438 m->mothurOutEndLine();
443 for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; }
450 catch(exception& e) {
451 m->errorOut(e, "MetaStatsCommand", "driver");
455 //**********************************************************************************************************************