5 * Created by westcott on 9/16/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13 #include "mothurmetastats.h"
15 //**********************************************************************************************************************
16 vector<string> MetaStatsCommand::setParameters(){
18 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
20 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
23 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
24 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
25 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "MetaStatsCommand", "setParameters");
38 //**********************************************************************************************************************
39 string MetaStatsCommand::getHelpString(){
41 string helpString = "";
42 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
43 helpString += "The metastats command outputs a .metastats file. \n";
44 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors. The shared and design parameters are required, unless you have valid current files.\n";
45 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
46 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
47 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n";
49 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
50 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
51 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
52 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
53 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
54 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
55 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
56 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
60 m->errorOut(e, "MetaStatsCommand", "getHelpString");
64 //**********************************************************************************************************************
65 MetaStatsCommand::MetaStatsCommand(){
67 abort = true; calledHelp = true;
69 vector<string> tempOutNames;
70 outputTypes["metastats"] = tempOutNames;
73 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
77 //**********************************************************************************************************************
79 MetaStatsCommand::MetaStatsCommand(string option) {
81 abort = false; calledHelp = false;
85 //allow user to run help
86 if(option == "help") { help(); abort = true; calledHelp = true; }
87 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
90 vector<string> myArray = setParameters();
92 OptionParser parser(option);
93 map<string,string> parameters = parser.getParameters();
95 ValidParameters validParameter;
97 //check to make sure all parameters are valid for command
98 map<string,string>::iterator it;
99 for (it = parameters.begin(); it != parameters.end(); it++) {
100 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
103 //initialize outputTypes
104 vector<string> tempOutNames;
105 outputTypes["metastats"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 string inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
113 it = parameters.find("design");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["design"] = inputDir + it->second; }
121 it = parameters.find("shared");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["shared"] = inputDir + it->second; }
131 //check for required parameters
132 sharedfile = validParameter.validFile(parameters, "shared", true);
133 if (sharedfile == "not open") { abort = true; }
134 else if (sharedfile == "not found") { //if there is a current shared file, use it
135 sharedfile = m->getSharedFile();
136 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
137 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
138 }else { m->setSharedFile(sharedfile); }
140 //check for required parameters
141 designfile = validParameter.validFile(parameters, "design", true);
142 if (designfile == "not open") { abort = true; }
143 else if (designfile == "not found") {
144 //if there is a current design file, use it
145 designfile = m->getDesignFile();
146 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
148 }else { m->setDesignFile(designfile); }
150 //if the user changes the output directory command factory will send this info to us in the output parameter
151 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
153 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it
156 //check for optional parameter and set defaults
157 // ...at some point should added some additional type checking...
158 label = validParameter.validFile(parameters, "label", false);
159 if (label == "not found") { label = ""; }
161 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
162 else { allLines = 1; }
165 groups = validParameter.validFile(parameters, "groups", false);
166 if (groups == "not found") { groups = ""; pickedGroups = false; }
169 m->splitAtDash(groups, Groups);
173 sets = validParameter.validFile(parameters, "sets", false);
174 if (sets == "not found") { sets = ""; }
176 m->splitAtDash(sets, Sets);
180 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
181 convert(temp, iters);
183 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; }
184 convert(temp, threshold);
186 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
187 m->setProcessors(temp);
188 convert(temp, processors);
192 catch(exception& e) {
193 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
197 //**********************************************************************************************************************
199 int MetaStatsCommand::execute(){
202 if (abort == true) { if (calledHelp) { return 0; } return 2; }
204 designMap = new GroupMap(designfile);
205 designMap->readDesignMap();
207 input = new InputData(sharedfile, "sharedfile");
208 lookup = input->getSharedRAbundVectors();
209 string lastLabel = lookup[0]->getLabel();
211 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
212 set<string> processedLabels;
213 set<string> userLabels = labels;
215 //setup the pairwise comparions of sets for metastats
216 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
217 //make sure sets are all in designMap
218 SharedUtil* util = new SharedUtil();
219 util->setGroups(Sets, designMap->namesOfGroups);
222 int numGroups = Sets.size();
223 for (int a=0; a<numGroups; a++) {
224 for (int l = 0; l < a; l++) {
225 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
226 namesOfGroupCombos.push_back(groups);
232 if (numGroups == 2) { processors = 1; }
233 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
235 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
237 int numPairs = namesOfGroupCombos.size();
238 int numPairsPerProcessor = numPairs / processors;
240 for (int i = 0; i < processors; i++) {
241 int startPos = i * numPairsPerProcessor;
242 if(i == processors - 1){
243 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
245 lines.push_back(linePair(startPos, numPairsPerProcessor));
250 //as long as you are not at the end of the file or done wih the lines you want
251 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
253 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
255 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
257 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
260 processedLabels.insert(lookup[0]->getLabel());
261 userLabels.erase(lookup[0]->getLabel());
264 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
265 string saveLabel = lookup[0]->getLabel();
267 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
268 lookup = input->getSharedRAbundVectors(lastLabel);
269 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
273 processedLabels.insert(lookup[0]->getLabel());
274 userLabels.erase(lookup[0]->getLabel());
276 //restore real lastlabel to save below
277 lookup[0]->setLabel(saveLabel);
280 lastLabel = lookup[0]->getLabel();
281 //prevent memory leak
282 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
284 if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
286 //get next line to process
287 lookup = input->getSharedRAbundVectors();
290 if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
292 //output error messages about any remaining user labels
293 set<string>::iterator it;
294 bool needToRun = false;
295 for (it = userLabels.begin(); it != userLabels.end(); it++) {
296 m->mothurOut("Your file does not include the label " + *it);
297 if (processedLabels.count(lastLabel) != 1) {
298 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
301 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
305 //run last label if you need to
306 if (needToRun == true) {
307 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
308 lookup = input->getSharedRAbundVectors(lastLabel);
310 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
314 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
317 //reset groups parameter
322 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;}
324 m->mothurOutEndLine();
325 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
326 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
327 m->mothurOutEndLine();
331 catch(exception& e) {
332 m->errorOut(e, "MetaStatsCommand", "execute");
336 //**********************************************************************************************************************
338 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
341 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
343 driver(0, namesOfGroupCombos.size(), thisLookUp);
346 vector<int> processIDS;
348 //loop through and create all the processes you want
349 while (process != processors) {
353 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
356 driver(lines[process].start, lines[process].num, thisLookUp);
359 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
360 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
366 driver(lines[0].start, lines[0].num, thisLookUp);
368 //force parent to wait until all the processes are done
369 for (int i=0;i<(processors-1);i++) {
370 int temp = processIDS[i];
375 driver(0, namesOfGroupCombos.size(), thisLookUp);
381 catch(exception& e) {
382 m->errorOut(e, "MetaStatsCommand", "process");
386 //**********************************************************************************************************************
387 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) {
391 for (int c = start; c < (start+num); c++) {
394 string setA = namesOfGroupCombos[c][0];
395 string setB = namesOfGroupCombos[c][1];
398 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
399 outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
400 int nameLength = outputFileName.length();
401 char * output = new char[nameLength];
402 strcpy(output, outputFileName.c_str());
404 //build matrix from shared rabunds
406 data = new double*[thisLookUp[0]->getNumBins()];
408 vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
410 vector<SharedRAbundVector*> subset;
413 for (int i = 0; i < thisLookUp.size(); i++) {
414 string thisGroup = thisLookUp[i]->getGroup();
416 //is this group for a set we want to compare??
417 //sorting the sets by putting setB at the back and setA in the front
418 if ((designMap->getGroup(thisGroup) == setB)) {
419 subset.push_back(thisLookUp[i]);
421 }else if ((designMap->getGroup(thisGroup) == setA)) {
422 subset.insert(subset.begin()+setACount, thisLookUp[i]);
427 if ((setACount == 0) || (setBCount == 0)) {
428 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
429 outputNames.pop_back();
432 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
433 data[j] = new double[subset.size()];
434 data2[j].resize(subset.size(), 0.0);
435 for (int i = 0; i < subset.size(); i++) {
436 data[j][i] = (subset[i]->getAbundance(j));
437 data2[j][i] = (subset[i]->getAbundance(j));
441 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
442 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
444 m->mothurOutEndLine();
445 MothurMetastats mothurMeta(threshold, iters);
446 mothurMeta.runMetastats(outputFileName+".myVersion" , data2, setACount);
447 m->mothurOutEndLine();
449 m->mothurOutEndLine();
454 for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; }
461 catch(exception& e) {
462 m->errorOut(e, "MetaStatsCommand", "driver");
466 //**********************************************************************************************************************