5 * Created by westcott on 9/16/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
15 //**********************************************************************************************************************
16 vector<string> MetaStatsCommand::setParameters(){
18 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
20 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
23 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
24 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
25 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "MetaStatsCommand", "setParameters");
38 //**********************************************************************************************************************
39 string MetaStatsCommand::getHelpString(){
41 string helpString = "";
42 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
43 helpString += "The metastats command outputs a .metastats file. \n";
44 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors. The shared and design parameters are required, unless you have valid current files.\n";
45 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
46 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
47 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n";
49 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
50 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
51 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
52 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
53 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
54 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
55 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
56 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
60 m->errorOut(e, "MetaStatsCommand", "getHelpString");
64 //**********************************************************************************************************************
65 MetaStatsCommand::MetaStatsCommand(){
67 abort = true; calledHelp = true;
69 vector<string> tempOutNames;
70 outputTypes["metastats"] = tempOutNames;
73 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
77 //**********************************************************************************************************************
79 MetaStatsCommand::MetaStatsCommand(string option) {
81 abort = false; calledHelp = false;
85 //allow user to run help
86 if(option == "help") { help(); abort = true; calledHelp = true; }
87 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
90 vector<string> myArray = setParameters();
92 OptionParser parser(option);
93 map<string,string> parameters = parser.getParameters();
95 ValidParameters validParameter;
97 //check to make sure all parameters are valid for command
98 map<string,string>::iterator it;
99 for (it = parameters.begin(); it != parameters.end(); it++) {
100 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
103 //initialize outputTypes
104 vector<string> tempOutNames;
105 outputTypes["metastats"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 string inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
113 it = parameters.find("design");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["design"] = inputDir + it->second; }
121 it = parameters.find("shared");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["shared"] = inputDir + it->second; }
131 //check for required parameters
132 sharedfile = validParameter.validFile(parameters, "shared", true);
133 if (sharedfile == "not open") { abort = true; }
134 else if (sharedfile == "not found") { //if there is a current shared file, use it
135 sharedfile = m->getSharedFile();
136 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
137 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
138 }else { m->setSharedFile(sharedfile); }
140 //check for required parameters
141 designfile = validParameter.validFile(parameters, "design", true);
142 if (designfile == "not open") { abort = true; }
143 else if (designfile == "not found") {
144 //if there is a current design file, use it
145 designfile = m->getDesignFile();
146 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
148 }else { m->setDesignFile(designfile); }
150 //if the user changes the output directory command factory will send this info to us in the output parameter
151 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
153 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it
156 //check for optional parameter and set defaults
157 // ...at some point should added some additional type checking...
158 label = validParameter.validFile(parameters, "label", false);
159 if (label == "not found") { label = ""; }
161 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
162 else { allLines = 1; }
165 groups = validParameter.validFile(parameters, "groups", false);
166 if (groups == "not found") { groups = ""; pickedGroups = false; }
169 m->splitAtDash(groups, Groups);
170 m->setGroups(Groups);
173 sets = validParameter.validFile(parameters, "sets", false);
174 if (sets == "not found") { sets = ""; }
176 m->splitAtDash(sets, Sets);
180 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
181 m->mothurConvert(temp, iters);
183 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; }
184 m->mothurConvert(temp, threshold);
186 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
187 m->setProcessors(temp);
188 m->mothurConvert(temp, processors);
192 catch(exception& e) {
193 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
197 //**********************************************************************************************************************
199 int MetaStatsCommand::execute(){
202 if (abort == true) { if (calledHelp) { return 0; } return 2; }
204 designMap = new GroupMap(designfile);
205 designMap->readDesignMap();
207 input = new InputData(sharedfile, "sharedfile");
208 lookup = input->getSharedRAbundVectors();
209 string lastLabel = lookup[0]->getLabel();
211 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
212 set<string> processedLabels;
213 set<string> userLabels = labels;
215 //setup the pairwise comparions of sets for metastats
216 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
217 //make sure sets are all in designMap
218 SharedUtil* util = new SharedUtil();
219 vector<string> dGroups = designMap->getNamesOfGroups();
220 util->setGroups(Sets, dGroups);
223 int numGroups = Sets.size();
224 for (int a=0; a<numGroups; a++) {
225 for (int l = 0; l < a; l++) {
226 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
227 namesOfGroupCombos.push_back(groups);
233 if (numGroups == 2) { processors = 1; }
234 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
237 int numPairs = namesOfGroupCombos.size();
238 int numPairsPerProcessor = numPairs / processors;
240 for (int i = 0; i < processors; i++) {
241 int startPos = i * numPairsPerProcessor;
242 if(i == processors - 1){
243 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
245 lines.push_back(linePair(startPos, numPairsPerProcessor));
249 //as long as you are not at the end of the file or done wih the lines you want
250 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
252 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
254 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
256 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
259 processedLabels.insert(lookup[0]->getLabel());
260 userLabels.erase(lookup[0]->getLabel());
263 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
264 string saveLabel = lookup[0]->getLabel();
266 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
267 lookup = input->getSharedRAbundVectors(lastLabel);
268 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
272 processedLabels.insert(lookup[0]->getLabel());
273 userLabels.erase(lookup[0]->getLabel());
275 //restore real lastlabel to save below
276 lookup[0]->setLabel(saveLabel);
279 lastLabel = lookup[0]->getLabel();
280 //prevent memory leak
281 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
283 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
285 //get next line to process
286 lookup = input->getSharedRAbundVectors();
289 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
291 //output error messages about any remaining user labels
292 set<string>::iterator it;
293 bool needToRun = false;
294 for (it = userLabels.begin(); it != userLabels.end(); it++) {
295 m->mothurOut("Your file does not include the label " + *it);
296 if (processedLabels.count(lastLabel) != 1) {
297 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
300 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
304 //run last label if you need to
305 if (needToRun == true) {
306 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
307 lookup = input->getSharedRAbundVectors(lastLabel);
309 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
313 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
316 //reset groups parameter
321 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;}
323 m->mothurOutEndLine();
324 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
325 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
326 m->mothurOutEndLine();
330 catch(exception& e) {
331 m->errorOut(e, "MetaStatsCommand", "execute");
335 //**********************************************************************************************************************
337 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
342 driver(0, namesOfGroupCombos.size(), thisLookUp);
345 vector<int> processIDS;
346 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
347 //loop through and create all the processes you want
348 while (process != processors) {
352 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
355 driver(lines[process].start, lines[process].num, thisLookUp);
358 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
359 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
365 driver(lines[0].start, lines[0].num, thisLookUp);
367 //force parent to wait until all the processes are done
368 for (int i=0;i<(processors-1);i++) {
369 int temp = processIDS[i];
374 //////////////////////////////////////////////////////////////////////////////////////////////////////
375 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
376 //Above fork() will clone, so memory is separate, but that's not the case with windows,
377 //Taking advantage of shared memory to pass results vectors.
378 //////////////////////////////////////////////////////////////////////////////////////////////////////
380 vector<metastatsData*> pDataArray;
381 DWORD dwThreadIdArray[processors-1];
382 HANDLE hThreadArray[processors-1];
384 //Create processor worker threads.
385 for( int i=1; i<processors; i++ ){
387 //make copy of lookup so we don't get access violations
388 vector<SharedRAbundVector*> newLookup;
389 vector<string> designMapGroups;
390 for (int k = 0; k < thisLookUp.size(); k++) {
391 SharedRAbundVector* temp = new SharedRAbundVector();
392 temp->setLabel(thisLookUp[k]->getLabel());
393 temp->setGroup(thisLookUp[k]->getGroup());
394 newLookup.push_back(temp);
395 designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
399 for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
400 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
401 for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
404 // Allocate memory for thread data.
405 metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
406 pDataArray.push_back(tempSum);
407 processIDS.push_back(i);
409 hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
413 driver(lines[0].start, lines[0].num, thisLookUp);
415 //Wait until all threads have terminated.
416 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
418 //Close all thread handles and free memory allocations.
419 for(int i=0; i < pDataArray.size(); i++){
420 for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) { delete pDataArray[i]->thisLookUp[j]; }
421 for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {
422 outputNames.push_back(pDataArray[i]->outputNames[j]);
423 outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
426 CloseHandle(hThreadArray[i]);
427 delete pDataArray[i];
436 catch(exception& e) {
437 m->errorOut(e, "MetaStatsCommand", "process");
441 //**********************************************************************************************************************
442 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) {
446 for (int c = start; c < (start+num); c++) {
449 string setA = namesOfGroupCombos[c][0];
450 string setB = namesOfGroupCombos[c][1];
453 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
454 outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
455 //int nameLength = outputFileName.length();
456 //char * output = new char[nameLength];
457 //strcpy(output, outputFileName.c_str());
459 //build matrix from shared rabunds
461 //data = new double*[thisLookUp[0]->getNumBins()];
463 vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
465 vector<SharedRAbundVector*> subset;
468 for (int i = 0; i < thisLookUp.size(); i++) {
469 string thisGroup = thisLookUp[i]->getGroup();
471 //is this group for a set we want to compare??
472 //sorting the sets by putting setB at the back and setA in the front
473 if ((designMap->getGroup(thisGroup) == setB)) {
474 subset.push_back(thisLookUp[i]);
476 }else if ((designMap->getGroup(thisGroup) == setA)) {
477 subset.insert(subset.begin()+setACount, thisLookUp[i]);
482 if ((setACount == 0) || (setBCount == 0)) {
483 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
484 outputNames.pop_back();
487 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
488 //data[j] = new double[subset.size()];
489 data2[j].resize(subset.size(), 0.0);
490 for (int i = 0; i < subset.size(); i++) {
491 //data[j][i] = (subset[i]->getAbundance(j));
492 data2[j][i] = (subset[i]->getAbundance(j));
496 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
497 //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
499 m->mothurOutEndLine();
500 MothurMetastats mothurMeta(threshold, iters);
501 mothurMeta.runMetastats(outputFileName , data2, setACount);
502 m->mothurOutEndLine();
504 m->mothurOutEndLine();
509 //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; }
516 catch(exception& e) {
517 m->errorOut(e, "MetaStatsCommand", "driver");
521 //**********************************************************************************************************************