5 * Created by westcott on 9/16/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
14 //**********************************************************************************************************************
16 MetaStatsCommand::MetaStatsCommand(string option) {
18 globaldata = GlobalData::getInstance();
23 //allow user to run help
24 if(option == "help") { help(); abort = true; }
27 //valid paramters for this command
28 string AlignArray[] = {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
29 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
31 OptionParser parser(option);
32 map<string,string> parameters = parser.getParameters();
34 ValidParameters validParameter;
36 //check to make sure all parameters are valid for command
37 map<string,string>::iterator it;
38 for (it = parameters.begin(); it != parameters.end(); it++) {
39 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
42 //if the user changes the output directory command factory will send this info to us in the output parameter
43 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
45 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
48 //if the user changes the input directory command factory will send this info to us in the output parameter
49 string inputDir = validParameter.validFile(parameters, "inputdir", false);
50 if (inputDir == "not found"){ inputDir = ""; }
53 it = parameters.find("design");
54 //user has given a template file
55 if(it != parameters.end()){
56 path = m->hasPath(it->second);
57 //if the user has not given a path then, add inputdir. else leave path alone.
58 if (path == "") { parameters["design"] = inputDir + it->second; }
62 //check for required parameters
63 designfile = validParameter.validFile(parameters, "design", true);
64 if (designfile == "not open") { abort = true; }
65 else if (designfile == "not found") { designfile = ""; m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }
67 //make sure the user has already run the read.otu command
68 if ((globaldata->getSharedFile() == "")) {
69 m->mothurOut("You must read a list and a group, or a shared file before you can use the normalize.shared command."); m->mothurOutEndLine(); abort = true;
72 //check for optional parameter and set defaults
73 // ...at some point should added some additional type checking...
74 label = validParameter.validFile(parameters, "label", false);
75 if (label == "not found") { label = ""; }
77 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
78 else { allLines = 1; }
81 //if the user has not specified any labels use the ones from read.otu
83 allLines = globaldata->allLines;
84 labels = globaldata->labels;
87 groups = validParameter.validFile(parameters, "groups", false);
88 if (groups == "not found") { groups = ""; pickedGroups = false; }
91 m->splitAtDash(groups, Groups);
92 globaldata->Groups = Groups;
95 sets = validParameter.validFile(parameters, "sets", false);
96 if (sets == "not found") { sets = ""; }
98 m->splitAtDash(sets, Sets);
102 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
103 convert(temp, iters);
105 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; }
106 convert(temp, threshold);
108 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
109 convert(temp, processors);
113 catch(exception& e) {
114 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
119 //**********************************************************************************************************************
121 void MetaStatsCommand::help(){
123 m->mothurOut("This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n");
124 m->mothurOut("The metastats command can only be executed after a successful read.otu command of a list and group or shared file.\n");
125 m->mothurOut("The metastats command outputs a .metastats file. \n");
126 m->mothurOut("The metastats command parameters are iters, threshold, groups, label, design, sets and processors. The design parameter is required.\n");
127 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n");
128 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
129 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
130 m->mothurOut("The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n");
131 m->mothurOut("The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n");
132 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
133 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
134 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
135 m->mothurOut("The metastats command should be in the following format: metastats(design=yourDesignFile).\n");
136 m->mothurOut("Example metastats(design=temp.design, groups=A-B-C).\n");
137 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
138 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
141 catch(exception& e) {
142 m->errorOut(e, "MetaStatsCommand", "help");
147 //**********************************************************************************************************************
149 MetaStatsCommand::~MetaStatsCommand(){}
151 //**********************************************************************************************************************
153 int MetaStatsCommand::execute(){
156 if (abort == true) { return 0; }
158 designMap = new GroupMap(designfile);
159 designMap->readDesignMap();
161 read = new ReadOTUFile(globaldata->inputFileName);
162 read->read(&*globaldata);
163 input = globaldata->ginput;
164 lookup = input->getSharedRAbundVectors();
165 string lastLabel = lookup[0]->getLabel();
167 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
168 set<string> processedLabels;
169 set<string> userLabels = labels;
171 //setup the pairwise comparions of sets for metastats
172 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
173 //make sure sets are all in designMap
174 SharedUtil* util = new SharedUtil();
175 util->setGroups(Sets, designMap->namesOfGroups);
178 int numGroups = Sets.size();
179 for (int a=0; a<numGroups; a++) {
180 for (int l = a+1; l < numGroups; l++) {
181 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
182 namesOfGroupCombos.push_back(groups);
188 if (numGroups == 2) { processors = 1; }
189 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
191 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
193 int numPairs = namesOfGroupCombos.size();
194 int numPairsPerProcessor = numPairs / processors;
196 for (int i = 0; i < processors; i++) {
197 int startPos = i * numPairsPerProcessor;
198 if(i == processors - 1){
199 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
201 lines.push_back(linePair(startPos, numPairsPerProcessor));
206 //as long as you are not at the end of the file or done wih the lines you want
207 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
209 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete read; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
211 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
213 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
216 processedLabels.insert(lookup[0]->getLabel());
217 userLabels.erase(lookup[0]->getLabel());
220 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
221 string saveLabel = lookup[0]->getLabel();
223 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
224 lookup = input->getSharedRAbundVectors(lastLabel);
225 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
229 processedLabels.insert(lookup[0]->getLabel());
230 userLabels.erase(lookup[0]->getLabel());
232 //restore real lastlabel to save below
233 lookup[0]->setLabel(saveLabel);
236 lastLabel = lookup[0]->getLabel();
237 //prevent memory leak
238 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
240 if (m->control_pressed) { globaldata->Groups.clear(); delete read; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
242 //get next line to process
243 lookup = input->getSharedRAbundVectors();
246 if (m->control_pressed) { globaldata->Groups.clear(); delete read; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
248 //output error messages about any remaining user labels
249 set<string>::iterator it;
250 bool needToRun = false;
251 for (it = userLabels.begin(); it != userLabels.end(); it++) {
252 m->mothurOut("Your file does not include the label " + *it);
253 if (processedLabels.count(lastLabel) != 1) {
254 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
257 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
261 //run last label if you need to
262 if (needToRun == true) {
263 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
264 lookup = input->getSharedRAbundVectors(lastLabel);
266 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
270 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
273 //reset groups parameter
274 globaldata->Groups.clear();
275 delete input; globaldata->ginput = NULL;
279 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0;}
281 m->mothurOutEndLine();
282 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
283 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
284 m->mothurOutEndLine();
288 catch(exception& e) {
289 m->errorOut(e, "MetaStatsCommand", "execute");
293 //**********************************************************************************************************************
295 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
297 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
299 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
301 driver(0, namesOfGroupCombos.size(), thisLookUp);
304 vector<int> processIDS;
306 //loop through and create all the processes you want
307 while (process != processors) {
311 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
314 driver(lines[process].start, lines[process].num, thisLookUp);
316 }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
320 driver(lines[0].start, lines[0].num, thisLookUp);
322 //force parent to wait until all the processes are done
323 for (int i=0;i<(processors-1);i++) {
324 int temp = processIDS[i];
329 driver(0, namesOfGroupCombos.size(), thisLookUp);
335 catch(exception& e) {
336 m->errorOut(e, "MetaStatsCommand", "process");
340 //**********************************************************************************************************************
341 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) {
345 for (int c = start; c < (start+num); c++) {
348 string setA = namesOfGroupCombos[c][0];
349 string setB = namesOfGroupCombos[c][1];
352 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
353 outputNames.push_back(outputFileName);
354 int nameLength = outputFileName.length();
355 char * output = new char[nameLength];
356 strcpy(output, outputFileName.c_str());
358 //build matrix from shared rabunds
360 data = new double*[thisLookUp[0]->getNumBins()];
362 vector<SharedRAbundVector*> subset;
365 for (int i = 0; i < thisLookUp.size(); i++) {
366 string thisGroup = thisLookUp[i]->getGroup();
368 //is this group for a set we want to compare??
369 //sorting the sets by putting setB at the back and setA in the front
370 if ((designMap->getGroup(thisGroup) == setB)) {
371 subset.push_back(thisLookUp[i]);
373 }else if ((designMap->getGroup(thisGroup) == setA)) {
374 subset.insert(subset.begin()+setACount, thisLookUp[i]);
379 if ((setACount == 0) || (setBCount == 0)) {
380 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
381 outputNames.pop_back();
384 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
385 data[j] = new double[subset.size()];
386 for (int i = 0; i < subset.size(); i++) {
387 data[j][i] = (subset[i]->getAbundance(j));
391 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
393 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
395 m->mothurOutEndLine();
400 for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; }
407 catch(exception& e) {
408 m->errorOut(e, "MetaStatsCommand", "driver");
412 //**********************************************************************************************************************
413 int MetaStatsCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
416 vector<SharedRAbundVector*> newLookup;
417 for (int i = 0; i < thislookup.size(); i++) {
418 SharedRAbundVector* temp = new SharedRAbundVector();
419 temp->setLabel(thislookup[i]->getLabel());
420 temp->setGroup(thislookup[i]->getGroup());
421 newLookup.push_back(temp);
425 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
426 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
428 //look at each sharedRabund and make sure they are not all zero
430 for (int j = 0; j < thislookup.size(); j++) {
431 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
434 //if they are not all zero add this bin
436 for (int j = 0; j < thislookup.size(); j++) {
437 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
442 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
444 thislookup = newLookup;
449 catch(exception& e) {
450 m->errorOut(e, "MetaStatsCommand", "eliminateZeroOTUS");
454 //**********************************************************************************************************************