5 * Created by westcott on 9/16/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13 #include "mothurmetastats.h"
15 //**********************************************************************************************************************
16 vector<string> MetaStatsCommand::setParameters(){
18 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
20 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
23 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
24 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
25 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "MetaStatsCommand", "setParameters");
38 //**********************************************************************************************************************
39 string MetaStatsCommand::getHelpString(){
41 string helpString = "";
42 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
43 helpString += "The metastats command outputs a .metastats file. \n";
44 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors. The shared and design parameters are required, unless you have valid current files.\n";
45 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
46 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
47 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n";
49 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
50 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
51 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
52 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
53 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
54 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
55 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
56 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
60 m->errorOut(e, "MetaStatsCommand", "getHelpString");
64 //**********************************************************************************************************************
65 MetaStatsCommand::MetaStatsCommand(){
67 abort = true; calledHelp = true;
69 vector<string> tempOutNames;
70 outputTypes["metastats"] = tempOutNames;
73 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
77 //**********************************************************************************************************************
79 MetaStatsCommand::MetaStatsCommand(string option) {
81 abort = false; calledHelp = false;
85 //allow user to run help
86 if(option == "help") { help(); abort = true; calledHelp = true; }
87 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
90 vector<string> myArray = setParameters();
92 OptionParser parser(option);
93 map<string,string> parameters = parser.getParameters();
95 ValidParameters validParameter;
97 //check to make sure all parameters are valid for command
98 map<string,string>::iterator it;
99 for (it = parameters.begin(); it != parameters.end(); it++) {
100 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
103 //initialize outputTypes
104 vector<string> tempOutNames;
105 outputTypes["metastats"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 string inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
113 it = parameters.find("design");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["design"] = inputDir + it->second; }
121 it = parameters.find("shared");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["shared"] = inputDir + it->second; }
131 //check for required parameters
132 sharedfile = validParameter.validFile(parameters, "shared", true);
133 if (sharedfile == "not open") { abort = true; }
134 else if (sharedfile == "not found") { //if there is a current shared file, use it
135 sharedfile = m->getSharedFile();
136 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
137 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
138 }else { m->setSharedFile(sharedfile); }
140 //check for required parameters
141 designfile = validParameter.validFile(parameters, "design", true);
142 if (designfile == "not open") { abort = true; }
143 else if (designfile == "not found") {
144 //if there is a current design file, use it
145 designfile = m->getDesignFile();
146 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
148 }else { m->setDesignFile(designfile); }
150 //if the user changes the output directory command factory will send this info to us in the output parameter
151 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
153 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it
156 //check for optional parameter and set defaults
157 // ...at some point should added some additional type checking...
158 label = validParameter.validFile(parameters, "label", false);
159 if (label == "not found") { label = ""; }
161 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
162 else { allLines = 1; }
165 groups = validParameter.validFile(parameters, "groups", false);
166 if (groups == "not found") { groups = ""; pickedGroups = false; }
169 m->splitAtDash(groups, Groups);
170 m->setGroups(Groups);
173 sets = validParameter.validFile(parameters, "sets", false);
174 if (sets == "not found") { sets = ""; }
176 m->splitAtDash(sets, Sets);
180 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
181 m->mothurConvert(temp, iters);
183 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; }
184 m->mothurConvert(temp, threshold);
186 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
187 m->setProcessors(temp);
188 m->mothurConvert(temp, processors);
192 catch(exception& e) {
193 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
197 //**********************************************************************************************************************
199 int MetaStatsCommand::execute(){
202 if (abort == true) { if (calledHelp) { return 0; } return 2; }
204 designMap = new GroupMap(designfile);
205 designMap->readDesignMap();
207 input = new InputData(sharedfile, "sharedfile");
208 lookup = input->getSharedRAbundVectors();
209 string lastLabel = lookup[0]->getLabel();
211 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
212 set<string> processedLabels;
213 set<string> userLabels = labels;
215 //setup the pairwise comparions of sets for metastats
216 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
217 //make sure sets are all in designMap
218 SharedUtil* util = new SharedUtil();
219 vector<string> dGroups = designMap->getNamesOfGroups();
220 util->setGroups(Sets, dGroups);
223 int numGroups = Sets.size();
224 for (int a=0; a<numGroups; a++) {
225 for (int l = 0; l < a; l++) {
226 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
227 namesOfGroupCombos.push_back(groups);
233 if (numGroups == 2) { processors = 1; }
234 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
236 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
238 int numPairs = namesOfGroupCombos.size();
239 int numPairsPerProcessor = numPairs / processors;
241 for (int i = 0; i < processors; i++) {
242 int startPos = i * numPairsPerProcessor;
243 if(i == processors - 1){
244 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
246 lines.push_back(linePair(startPos, numPairsPerProcessor));
251 //as long as you are not at the end of the file or done wih the lines you want
252 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
254 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
256 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
258 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
261 processedLabels.insert(lookup[0]->getLabel());
262 userLabels.erase(lookup[0]->getLabel());
265 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
266 string saveLabel = lookup[0]->getLabel();
268 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
269 lookup = input->getSharedRAbundVectors(lastLabel);
270 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
274 processedLabels.insert(lookup[0]->getLabel());
275 userLabels.erase(lookup[0]->getLabel());
277 //restore real lastlabel to save below
278 lookup[0]->setLabel(saveLabel);
281 lastLabel = lookup[0]->getLabel();
282 //prevent memory leak
283 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
285 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
287 //get next line to process
288 lookup = input->getSharedRAbundVectors();
291 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
293 //output error messages about any remaining user labels
294 set<string>::iterator it;
295 bool needToRun = false;
296 for (it = userLabels.begin(); it != userLabels.end(); it++) {
297 m->mothurOut("Your file does not include the label " + *it);
298 if (processedLabels.count(lastLabel) != 1) {
299 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
302 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
306 //run last label if you need to
307 if (needToRun == true) {
308 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
309 lookup = input->getSharedRAbundVectors(lastLabel);
311 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
315 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
318 //reset groups parameter
323 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;}
325 m->mothurOutEndLine();
326 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
327 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
328 m->mothurOutEndLine();
332 catch(exception& e) {
333 m->errorOut(e, "MetaStatsCommand", "execute");
337 //**********************************************************************************************************************
339 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
342 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
344 driver(0, namesOfGroupCombos.size(), thisLookUp);
347 vector<int> processIDS;
349 //loop through and create all the processes you want
350 while (process != processors) {
354 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
357 driver(lines[process].start, lines[process].num, thisLookUp);
360 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
361 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
367 driver(lines[0].start, lines[0].num, thisLookUp);
369 //force parent to wait until all the processes are done
370 for (int i=0;i<(processors-1);i++) {
371 int temp = processIDS[i];
376 driver(0, namesOfGroupCombos.size(), thisLookUp);
382 catch(exception& e) {
383 m->errorOut(e, "MetaStatsCommand", "process");
387 //**********************************************************************************************************************
388 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) {
392 for (int c = start; c < (start+num); c++) {
395 string setA = namesOfGroupCombos[c][0];
396 string setB = namesOfGroupCombos[c][1];
397 //cout << setA << '\t' << setB << endl;
399 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
400 outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
401 //int nameLength = outputFileName.length();
402 //char * output = new char[nameLength];
403 //strcpy(output, outputFileName.c_str());
405 //build matrix from shared rabunds
407 //data = new double*[thisLookUp[0]->getNumBins()];
409 vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
411 vector<SharedRAbundVector*> subset;
414 for (int i = 0; i < thisLookUp.size(); i++) {
415 string thisGroup = thisLookUp[i]->getGroup();
417 //is this group for a set we want to compare??
418 //sorting the sets by putting setB at the back and setA in the front
419 if ((designMap->getGroup(thisGroup) == setB)) {
420 subset.push_back(thisLookUp[i]);
422 }else if ((designMap->getGroup(thisGroup) == setA)) {
423 subset.insert(subset.begin()+setACount, thisLookUp[i]);
428 //for (int i = 0; i < subset.size(); i++) { cout << designMap->getGroup(subset[i]->getGroup()) << endl; }
429 //cout << setACount << endl;
431 if ((setACount == 0) || (setBCount == 0)) {
432 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
433 outputNames.pop_back();
436 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
437 //data[j] = new double[subset.size()];
438 data2[j].resize(subset.size(), 0.0);
439 for (int i = 0; i < subset.size(); i++) {
440 //data[j][i] = (subset[i]->getAbundance(j));
441 data2[j][i] = (subset[i]->getAbundance(j));
445 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
446 //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
448 m->mothurOutEndLine();
449 MothurMetastats mothurMeta(threshold, iters);
450 mothurMeta.runMetastats(outputFileName , data2, setACount);
451 m->mothurOutEndLine();
453 m->mothurOutEndLine();
458 //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; }
465 catch(exception& e) {
466 m->errorOut(e, "MetaStatsCommand", "driver");
470 //**********************************************************************************************************************