5 * Created by westcott on 9/16/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::getValidParameters(){
17 string Array[] = {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
18 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22 m->errorOut(e, "MetaStatsCommand", "getValidParameters");
26 //**********************************************************************************************************************
27 MetaStatsCommand::MetaStatsCommand(){
30 //initialize outputTypes
31 vector<string> tempOutNames;
32 outputTypes["metastats"] = tempOutNames;
35 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
39 //**********************************************************************************************************************
40 vector<string> MetaStatsCommand::getRequiredParameters(){
42 string Array[] = {"design"};
43 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
47 m->errorOut(e, "MetaStatsCommand", "getRequiredParameters");
51 //**********************************************************************************************************************
52 vector<string> MetaStatsCommand::getRequiredFiles(){
54 string Array[] = {"shared"};
55 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
59 m->errorOut(e, "MetaStatsCommand", "getRequiredFiles");
63 //**********************************************************************************************************************
65 MetaStatsCommand::MetaStatsCommand(string option) {
67 globaldata = GlobalData::getInstance();
72 //allow user to run help
73 if(option == "help") { help(); abort = true; }
76 //valid paramters for this command
77 string AlignArray[] = {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
78 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
80 OptionParser parser(option);
81 map<string,string> parameters = parser.getParameters();
83 ValidParameters validParameter;
85 //check to make sure all parameters are valid for command
86 map<string,string>::iterator it;
87 for (it = parameters.begin(); it != parameters.end(); it++) {
88 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
91 //initialize outputTypes
92 vector<string> tempOutNames;
93 outputTypes["metastats"] = tempOutNames;
95 //if the user changes the output directory command factory will send this info to us in the output parameter
96 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
98 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
101 //if the user changes the input directory command factory will send this info to us in the output parameter
102 string inputDir = validParameter.validFile(parameters, "inputdir", false);
103 if (inputDir == "not found"){ inputDir = ""; }
106 it = parameters.find("design");
107 //user has given a template file
108 if(it != parameters.end()){
109 path = m->hasPath(it->second);
110 //if the user has not given a path then, add inputdir. else leave path alone.
111 if (path == "") { parameters["design"] = inputDir + it->second; }
115 //check for required parameters
116 designfile = validParameter.validFile(parameters, "design", true);
117 if (designfile == "not open") { abort = true; }
118 else if (designfile == "not found") { designfile = ""; m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }
120 //make sure the user has already run the read.otu command
121 if ((globaldata->getSharedFile() == "")) {
122 m->mothurOut("You must read a list and a group, or a shared file before you can use the metastats command."); m->mothurOutEndLine(); abort = true;
125 //check for optional parameter and set defaults
126 // ...at some point should added some additional type checking...
127 label = validParameter.validFile(parameters, "label", false);
128 if (label == "not found") { label = ""; }
130 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
131 else { allLines = 1; }
134 //if the user has not specified any labels use the ones from read.otu
136 allLines = globaldata->allLines;
137 labels = globaldata->labels;
140 groups = validParameter.validFile(parameters, "groups", false);
141 if (groups == "not found") { groups = ""; pickedGroups = false; }
144 m->splitAtDash(groups, Groups);
145 globaldata->Groups = Groups;
148 sets = validParameter.validFile(parameters, "sets", false);
149 if (sets == "not found") { sets = ""; }
151 m->splitAtDash(sets, Sets);
155 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
156 convert(temp, iters);
158 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; }
159 convert(temp, threshold);
161 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
162 convert(temp, processors);
166 catch(exception& e) {
167 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
172 //**********************************************************************************************************************
174 void MetaStatsCommand::help(){
176 m->mothurOut("This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n");
177 m->mothurOut("The metastats command can only be executed after a successful read.otu command of a list and group or shared file.\n");
178 m->mothurOut("The metastats command outputs a .metastats file. \n");
179 m->mothurOut("The metastats command parameters are iters, threshold, groups, label, design, sets and processors. The design parameter is required.\n");
180 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n");
181 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
182 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
183 m->mothurOut("The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n");
184 m->mothurOut("The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n");
185 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
186 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
187 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
188 m->mothurOut("The metastats command should be in the following format: metastats(design=yourDesignFile).\n");
189 m->mothurOut("Example metastats(design=temp.design, groups=A-B-C).\n");
190 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
191 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
194 catch(exception& e) {
195 m->errorOut(e, "MetaStatsCommand", "help");
200 //**********************************************************************************************************************
202 MetaStatsCommand::~MetaStatsCommand(){}
204 //**********************************************************************************************************************
206 int MetaStatsCommand::execute(){
209 if (abort == true) { return 0; }
211 designMap = new GroupMap(designfile);
212 designMap->readDesignMap();
214 read = new ReadOTUFile(globaldata->inputFileName);
215 read->read(&*globaldata);
216 input = globaldata->ginput;
217 lookup = input->getSharedRAbundVectors();
218 string lastLabel = lookup[0]->getLabel();
220 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
221 set<string> processedLabels;
222 set<string> userLabels = labels;
224 //setup the pairwise comparions of sets for metastats
225 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
226 //make sure sets are all in designMap
227 SharedUtil* util = new SharedUtil();
228 util->setGroups(Sets, designMap->namesOfGroups);
231 int numGroups = Sets.size();
232 for (int a=0; a<numGroups; a++) {
233 for (int l = 0; l < a; l++) {
234 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
235 namesOfGroupCombos.push_back(groups);
241 if (numGroups == 2) { processors = 1; }
242 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
244 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
246 int numPairs = namesOfGroupCombos.size();
247 int numPairsPerProcessor = numPairs / processors;
249 for (int i = 0; i < processors; i++) {
250 int startPos = i * numPairsPerProcessor;
251 if(i == processors - 1){
252 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
254 lines.push_back(linePair(startPos, numPairsPerProcessor));
259 //as long as you are not at the end of the file or done wih the lines you want
260 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
262 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete read; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
264 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
266 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
269 processedLabels.insert(lookup[0]->getLabel());
270 userLabels.erase(lookup[0]->getLabel());
273 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
274 string saveLabel = lookup[0]->getLabel();
276 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
277 lookup = input->getSharedRAbundVectors(lastLabel);
278 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
282 processedLabels.insert(lookup[0]->getLabel());
283 userLabels.erase(lookup[0]->getLabel());
285 //restore real lastlabel to save below
286 lookup[0]->setLabel(saveLabel);
289 lastLabel = lookup[0]->getLabel();
290 //prevent memory leak
291 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
293 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
295 //get next line to process
296 lookup = input->getSharedRAbundVectors();
299 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
301 //output error messages about any remaining user labels
302 set<string>::iterator it;
303 bool needToRun = false;
304 for (it = userLabels.begin(); it != userLabels.end(); it++) {
305 m->mothurOut("Your file does not include the label " + *it);
306 if (processedLabels.count(lastLabel) != 1) {
307 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
310 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
314 //run last label if you need to
315 if (needToRun == true) {
316 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
317 lookup = input->getSharedRAbundVectors(lastLabel);
319 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
323 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
326 //reset groups parameter
327 globaldata->Groups.clear();
328 delete input; globaldata->ginput = NULL;
332 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0;}
334 m->mothurOutEndLine();
335 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
336 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
337 m->mothurOutEndLine();
341 catch(exception& e) {
342 m->errorOut(e, "MetaStatsCommand", "execute");
346 //**********************************************************************************************************************
348 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
350 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
352 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
354 driver(0, namesOfGroupCombos.size(), thisLookUp);
357 vector<int> processIDS;
359 //loop through and create all the processes you want
360 while (process != processors) {
364 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
367 driver(lines[process].start, lines[process].num, thisLookUp);
370 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
371 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
377 driver(lines[0].start, lines[0].num, thisLookUp);
379 //force parent to wait until all the processes are done
380 for (int i=0;i<(processors-1);i++) {
381 int temp = processIDS[i];
386 driver(0, namesOfGroupCombos.size(), thisLookUp);
392 catch(exception& e) {
393 m->errorOut(e, "MetaStatsCommand", "process");
397 //**********************************************************************************************************************
398 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) {
402 for (int c = start; c < (start+num); c++) {
405 string setA = namesOfGroupCombos[c][0];
406 string setB = namesOfGroupCombos[c][1];
409 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
410 outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
411 int nameLength = outputFileName.length();
412 char * output = new char[nameLength];
413 strcpy(output, outputFileName.c_str());
415 //build matrix from shared rabunds
417 data = new double*[thisLookUp[0]->getNumBins()];
419 vector<SharedRAbundVector*> subset;
422 for (int i = 0; i < thisLookUp.size(); i++) {
423 string thisGroup = thisLookUp[i]->getGroup();
425 //is this group for a set we want to compare??
426 //sorting the sets by putting setB at the back and setA in the front
427 if ((designMap->getGroup(thisGroup) == setB)) {
428 subset.push_back(thisLookUp[i]);
430 }else if ((designMap->getGroup(thisGroup) == setA)) {
431 subset.insert(subset.begin()+setACount, thisLookUp[i]);
436 if ((setACount == 0) || (setBCount == 0)) {
437 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
438 outputNames.pop_back();
441 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
442 data[j] = new double[subset.size()];
443 for (int i = 0; i < subset.size(); i++) {
444 data[j][i] = (subset[i]->getAbundance(j));
448 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
449 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
451 m->mothurOutEndLine();
456 for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; }
463 catch(exception& e) {
464 m->errorOut(e, "MetaStatsCommand", "driver");
468 //**********************************************************************************************************************
469 int MetaStatsCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
472 vector<SharedRAbundVector*> newLookup;
473 for (int i = 0; i < thislookup.size(); i++) {
474 SharedRAbundVector* temp = new SharedRAbundVector();
475 temp->setLabel(thislookup[i]->getLabel());
476 temp->setGroup(thislookup[i]->getGroup());
477 newLookup.push_back(temp);
481 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
482 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
484 //look at each sharedRabund and make sure they are not all zero
486 for (int j = 0; j < thislookup.size(); j++) {
487 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
490 //if they are not all zero add this bin
492 for (int j = 0; j < thislookup.size(); j++) {
493 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
498 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
500 thislookup = newLookup;
505 catch(exception& e) {
506 m->errorOut(e, "MetaStatsCommand", "eliminateZeroOTUS");
510 //**********************************************************************************************************************