5 * Created by westcott on 9/16/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::getValidParameters(){
17 string Array[] = {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
18 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22 m->errorOut(e, "MetaStatsCommand", "getValidParameters");
26 //**********************************************************************************************************************
27 MetaStatsCommand::MetaStatsCommand(){
29 abort = true; calledHelp = true;
30 vector<string> tempOutNames;
31 outputTypes["metastats"] = tempOutNames;
34 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
38 //**********************************************************************************************************************
39 vector<string> MetaStatsCommand::getRequiredParameters(){
41 string Array[] = {"design"};
42 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
46 m->errorOut(e, "MetaStatsCommand", "getRequiredParameters");
50 //**********************************************************************************************************************
51 vector<string> MetaStatsCommand::getRequiredFiles(){
53 string Array[] = {"shared"};
54 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
58 m->errorOut(e, "MetaStatsCommand", "getRequiredFiles");
62 //**********************************************************************************************************************
64 MetaStatsCommand::MetaStatsCommand(string option) {
66 globaldata = GlobalData::getInstance();
67 abort = false; calledHelp = false;
71 //allow user to run help
72 if(option == "help") { help(); abort = true; calledHelp = true; }
75 //valid paramters for this command
76 string AlignArray[] = {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
77 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
79 OptionParser parser(option);
80 map<string,string> parameters = parser.getParameters();
82 ValidParameters validParameter;
84 //check to make sure all parameters are valid for command
85 map<string,string>::iterator it;
86 for (it = parameters.begin(); it != parameters.end(); it++) {
87 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
90 //initialize outputTypes
91 vector<string> tempOutNames;
92 outputTypes["metastats"] = tempOutNames;
94 //if the user changes the output directory command factory will send this info to us in the output parameter
95 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
97 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
100 //if the user changes the input directory command factory will send this info to us in the output parameter
101 string inputDir = validParameter.validFile(parameters, "inputdir", false);
102 if (inputDir == "not found"){ inputDir = ""; }
105 it = parameters.find("design");
106 //user has given a template file
107 if(it != parameters.end()){
108 path = m->hasPath(it->second);
109 //if the user has not given a path then, add inputdir. else leave path alone.
110 if (path == "") { parameters["design"] = inputDir + it->second; }
114 //check for required parameters
115 designfile = validParameter.validFile(parameters, "design", true);
116 if (designfile == "not open") { abort = true; }
117 else if (designfile == "not found") { designfile = ""; m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }
119 //make sure the user has already run the read.otu command
120 if ((globaldata->getSharedFile() == "")) {
121 m->mothurOut("You must read a list and a group, or a shared file before you can use the metastats command."); m->mothurOutEndLine(); abort = true;
124 //check for optional parameter and set defaults
125 // ...at some point should added some additional type checking...
126 label = validParameter.validFile(parameters, "label", false);
127 if (label == "not found") { label = ""; }
129 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
130 else { allLines = 1; }
133 //if the user has not specified any labels use the ones from read.otu
135 allLines = globaldata->allLines;
136 labels = globaldata->labels;
139 groups = validParameter.validFile(parameters, "groups", false);
140 if (groups == "not found") { groups = ""; pickedGroups = false; }
143 m->splitAtDash(groups, Groups);
144 globaldata->Groups = Groups;
147 sets = validParameter.validFile(parameters, "sets", false);
148 if (sets == "not found") { sets = ""; }
150 m->splitAtDash(sets, Sets);
154 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
155 convert(temp, iters);
157 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; }
158 convert(temp, threshold);
160 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
161 convert(temp, processors);
165 catch(exception& e) {
166 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
171 //**********************************************************************************************************************
173 void MetaStatsCommand::help(){
175 m->mothurOut("This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n");
176 m->mothurOut("The metastats command can only be executed after a successful read.otu command of a list and group or shared file.\n");
177 m->mothurOut("The metastats command outputs a .metastats file. \n");
178 m->mothurOut("The metastats command parameters are iters, threshold, groups, label, design, sets and processors. The design parameter is required.\n");
179 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n");
180 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
181 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
182 m->mothurOut("The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n");
183 m->mothurOut("The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n");
184 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
185 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
186 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
187 m->mothurOut("The metastats command should be in the following format: metastats(design=yourDesignFile).\n");
188 m->mothurOut("Example metastats(design=temp.design, groups=A-B-C).\n");
189 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
190 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
193 catch(exception& e) {
194 m->errorOut(e, "MetaStatsCommand", "help");
199 //**********************************************************************************************************************
201 MetaStatsCommand::~MetaStatsCommand(){}
203 //**********************************************************************************************************************
205 int MetaStatsCommand::execute(){
208 if (abort == true) { if (calledHelp) { return 0; } return 2; }
210 designMap = new GroupMap(designfile);
211 designMap->readDesignMap();
213 read = new ReadOTUFile(globaldata->inputFileName);
214 read->read(&*globaldata);
215 input = globaldata->ginput;
216 lookup = input->getSharedRAbundVectors();
217 string lastLabel = lookup[0]->getLabel();
219 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
220 set<string> processedLabels;
221 set<string> userLabels = labels;
223 //setup the pairwise comparions of sets for metastats
224 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
225 //make sure sets are all in designMap
226 SharedUtil* util = new SharedUtil();
227 util->setGroups(Sets, designMap->namesOfGroups);
230 int numGroups = Sets.size();
231 for (int a=0; a<numGroups; a++) {
232 for (int l = 0; l < a; l++) {
233 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
234 namesOfGroupCombos.push_back(groups);
240 if (numGroups == 2) { processors = 1; }
241 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
243 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
245 int numPairs = namesOfGroupCombos.size();
246 int numPairsPerProcessor = numPairs / processors;
248 for (int i = 0; i < processors; i++) {
249 int startPos = i * numPairsPerProcessor;
250 if(i == processors - 1){
251 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
253 lines.push_back(linePair(startPos, numPairsPerProcessor));
258 //as long as you are not at the end of the file or done wih the lines you want
259 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
261 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete read; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
263 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
265 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
268 processedLabels.insert(lookup[0]->getLabel());
269 userLabels.erase(lookup[0]->getLabel());
272 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
273 string saveLabel = lookup[0]->getLabel();
275 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
276 lookup = input->getSharedRAbundVectors(lastLabel);
277 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
281 processedLabels.insert(lookup[0]->getLabel());
282 userLabels.erase(lookup[0]->getLabel());
284 //restore real lastlabel to save below
285 lookup[0]->setLabel(saveLabel);
288 lastLabel = lookup[0]->getLabel();
289 //prevent memory leak
290 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
292 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
294 //get next line to process
295 lookup = input->getSharedRAbundVectors();
298 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
300 //output error messages about any remaining user labels
301 set<string>::iterator it;
302 bool needToRun = false;
303 for (it = userLabels.begin(); it != userLabels.end(); it++) {
304 m->mothurOut("Your file does not include the label " + *it);
305 if (processedLabels.count(lastLabel) != 1) {
306 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
309 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
313 //run last label if you need to
314 if (needToRun == true) {
315 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
316 lookup = input->getSharedRAbundVectors(lastLabel);
318 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
322 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
325 //reset groups parameter
326 globaldata->Groups.clear();
327 delete input; globaldata->ginput = NULL;
331 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0;}
333 m->mothurOutEndLine();
334 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
335 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
336 m->mothurOutEndLine();
340 catch(exception& e) {
341 m->errorOut(e, "MetaStatsCommand", "execute");
345 //**********************************************************************************************************************
347 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
350 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
352 driver(0, namesOfGroupCombos.size(), thisLookUp);
355 vector<int> processIDS;
357 //loop through and create all the processes you want
358 while (process != processors) {
362 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
365 driver(lines[process].start, lines[process].num, thisLookUp);
368 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
369 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
375 driver(lines[0].start, lines[0].num, thisLookUp);
377 //force parent to wait until all the processes are done
378 for (int i=0;i<(processors-1);i++) {
379 int temp = processIDS[i];
384 driver(0, namesOfGroupCombos.size(), thisLookUp);
390 catch(exception& e) {
391 m->errorOut(e, "MetaStatsCommand", "process");
395 //**********************************************************************************************************************
396 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) {
400 for (int c = start; c < (start+num); c++) {
403 string setA = namesOfGroupCombos[c][0];
404 string setB = namesOfGroupCombos[c][1];
407 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
408 outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
409 int nameLength = outputFileName.length();
410 char * output = new char[nameLength];
411 strcpy(output, outputFileName.c_str());
413 //build matrix from shared rabunds
415 data = new double*[thisLookUp[0]->getNumBins()];
417 vector<SharedRAbundVector*> subset;
420 for (int i = 0; i < thisLookUp.size(); i++) {
421 string thisGroup = thisLookUp[i]->getGroup();
423 //is this group for a set we want to compare??
424 //sorting the sets by putting setB at the back and setA in the front
425 if ((designMap->getGroup(thisGroup) == setB)) {
426 subset.push_back(thisLookUp[i]);
428 }else if ((designMap->getGroup(thisGroup) == setA)) {
429 subset.insert(subset.begin()+setACount, thisLookUp[i]);
434 if ((setACount == 0) || (setBCount == 0)) {
435 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
436 outputNames.pop_back();
439 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
440 data[j] = new double[subset.size()];
441 for (int i = 0; i < subset.size(); i++) {
442 data[j][i] = (subset[i]->getAbundance(j));
446 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
447 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
449 m->mothurOutEndLine();
454 for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; }
461 catch(exception& e) {
462 m->errorOut(e, "MetaStatsCommand", "driver");
466 //**********************************************************************************************************************