5 * Created by westcott on 9/16/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "metastatscommand.h"
11 #include "sharedutilities.h"
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::setParameters(){
17 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","metastats",false,true,true); parameters.push_back(pshared);
18 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
19 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
20 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
21 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold);
22 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
23 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
24 CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "MetaStatsCommand", "setParameters");
37 //**********************************************************************************************************************
38 string MetaStatsCommand::getHelpString(){
40 string helpString = "";
41 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
42 helpString += "The metastats command outputs a .metastats file. \n";
43 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors. The shared and design parameters are required, unless you have valid current files.\n";
44 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
45 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
46 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
47 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n";
48 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
49 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
50 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
51 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
52 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
53 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
54 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
55 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59 m->errorOut(e, "MetaStatsCommand", "getHelpString");
63 //**********************************************************************************************************************
64 string MetaStatsCommand::getOutputPattern(string type) {
68 if (type == "metastats") { pattern = "[filename],[distance],[group],metastats"; }
69 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
74 m->errorOut(e, "MetaStatsCommand", "getOutputPattern");
78 //**********************************************************************************************************************
79 MetaStatsCommand::MetaStatsCommand(){
81 abort = true; calledHelp = true;
83 vector<string> tempOutNames;
84 outputTypes["metastats"] = tempOutNames;
87 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
91 //**********************************************************************************************************************
93 MetaStatsCommand::MetaStatsCommand(string option) {
95 abort = false; calledHelp = false;
99 //allow user to run help
100 if(option == "help") { help(); abort = true; calledHelp = true; }
101 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
104 vector<string> myArray = setParameters();
106 OptionParser parser(option);
107 map<string,string> parameters = parser.getParameters();
109 ValidParameters validParameter;
111 //check to make sure all parameters are valid for command
112 map<string,string>::iterator it;
113 for (it = parameters.begin(); it != parameters.end(); it++) {
114 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
117 //initialize outputTypes
118 vector<string> tempOutNames;
119 outputTypes["metastats"] = tempOutNames;
122 //if the user changes the input directory command factory will send this info to us in the output parameter
123 string inputDir = validParameter.validFile(parameters, "inputdir", false);
124 if (inputDir == "not found"){ inputDir = ""; }
127 it = parameters.find("design");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["design"] = inputDir + it->second; }
135 it = parameters.find("shared");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["shared"] = inputDir + it->second; }
145 //check for required parameters
146 sharedfile = validParameter.validFile(parameters, "shared", true);
147 if (sharedfile == "not open") { abort = true; }
148 else if (sharedfile == "not found") { //if there is a current shared file, use it
149 sharedfile = m->getSharedFile();
150 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
151 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
152 }else { m->setSharedFile(sharedfile); }
154 //check for required parameters
155 designfile = validParameter.validFile(parameters, "design", true);
156 if (designfile == "not open") { abort = true; }
157 else if (designfile == "not found") {
158 //if there is a current design file, use it
159 designfile = m->getDesignFile();
160 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
161 else { m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
162 }else { m->setDesignFile(designfile); }
164 //if the user changes the output directory command factory will send this info to us in the output parameter
165 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
167 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it
170 //check for optional parameter and set defaults
171 // ...at some point should added some additional type checking...
172 label = validParameter.validFile(parameters, "label", false);
173 if (label == "not found") { label = ""; }
175 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
176 else { allLines = 1; }
179 groups = validParameter.validFile(parameters, "groups", false);
180 if (groups == "not found") { groups = ""; pickedGroups = false; }
183 m->splitAtDash(groups, Groups);
184 m->setGroups(Groups);
187 sets = validParameter.validFile(parameters, "sets", false);
188 if (sets == "not found") { sets = ""; }
190 m->splitAtDash(sets, Sets);
194 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
195 m->mothurConvert(temp, iters);
197 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; }
198 m->mothurConvert(temp, threshold);
200 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
201 m->setProcessors(temp);
202 m->mothurConvert(temp, processors);
206 catch(exception& e) {
207 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
211 //**********************************************************************************************************************
213 int MetaStatsCommand::execute(){
216 if (abort == true) { if (calledHelp) { return 0; } return 2; }
218 designMap = new GroupMap(designfile);
219 designMap->readDesignMap();
221 input = new InputData(sharedfile, "sharedfile");
222 lookup = input->getSharedRAbundVectors();
223 string lastLabel = lookup[0]->getLabel();
225 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
226 set<string> processedLabels;
227 set<string> userLabels = labels;
229 //setup the pairwise comparions of sets for metastats
230 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
231 //make sure sets are all in designMap
232 SharedUtil* util = new SharedUtil();
233 vector<string> dGroups = designMap->getNamesOfGroups();
234 util->setGroups(Sets, dGroups);
237 int numGroups = Sets.size();
238 for (int a=0; a<numGroups; a++) {
239 for (int l = 0; l < a; l++) {
240 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
241 namesOfGroupCombos.push_back(groups);
247 if (numGroups == 2) { processors = 1; }
248 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
251 int numPairs = namesOfGroupCombos.size();
252 int numPairsPerProcessor = numPairs / processors;
254 for (int i = 0; i < processors; i++) {
255 int startPos = i * numPairsPerProcessor;
256 if(i == processors - 1){
257 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
259 lines.push_back(linePair(startPos, numPairsPerProcessor));
263 //as long as you are not at the end of the file or done wih the lines you want
264 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
266 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
268 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
270 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
273 processedLabels.insert(lookup[0]->getLabel());
274 userLabels.erase(lookup[0]->getLabel());
277 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
278 string saveLabel = lookup[0]->getLabel();
280 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
281 lookup = input->getSharedRAbundVectors(lastLabel);
282 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
286 processedLabels.insert(lookup[0]->getLabel());
287 userLabels.erase(lookup[0]->getLabel());
289 //restore real lastlabel to save below
290 lookup[0]->setLabel(saveLabel);
293 lastLabel = lookup[0]->getLabel();
294 //prevent memory leak
295 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
297 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
299 //get next line to process
300 lookup = input->getSharedRAbundVectors();
303 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
305 //output error messages about any remaining user labels
306 set<string>::iterator it;
307 bool needToRun = false;
308 for (it = userLabels.begin(); it != userLabels.end(); it++) {
309 m->mothurOut("Your file does not include the label " + *it);
310 if (processedLabels.count(lastLabel) != 1) {
311 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
314 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
318 //run last label if you need to
319 if (needToRun == true) {
320 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
321 lookup = input->getSharedRAbundVectors(lastLabel);
323 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
327 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
330 //reset groups parameter
335 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;}
337 m->mothurOutEndLine();
338 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
339 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
340 m->mothurOutEndLine();
344 catch(exception& e) {
345 m->errorOut(e, "MetaStatsCommand", "execute");
349 //**********************************************************************************************************************
351 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
356 driver(0, namesOfGroupCombos.size(), thisLookUp);
359 vector<int> processIDS;
360 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
361 //loop through and create all the processes you want
362 while (process != processors) {
366 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
369 driver(lines[process].start, lines[process].num, thisLookUp);
372 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
373 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
379 driver(lines[0].start, lines[0].num, thisLookUp);
381 //force parent to wait until all the processes are done
382 for (int i=0;i<(processors-1);i++) {
383 int temp = processIDS[i];
388 //////////////////////////////////////////////////////////////////////////////////////////////////////
389 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
390 //Above fork() will clone, so memory is separate, but that's not the case with windows,
391 //Taking advantage of shared memory to pass results vectors.
392 //////////////////////////////////////////////////////////////////////////////////////////////////////
394 vector<metastatsData*> pDataArray;
395 DWORD dwThreadIdArray[processors-1];
396 HANDLE hThreadArray[processors-1];
398 //Create processor worker threads.
399 for( int i=1; i<processors; i++ ){
401 //make copy of lookup so we don't get access violations
402 vector<SharedRAbundVector*> newLookup;
403 vector<string> designMapGroups;
404 for (int k = 0; k < thisLookUp.size(); k++) {
405 SharedRAbundVector* temp = new SharedRAbundVector();
406 temp->setLabel(thisLookUp[k]->getLabel());
407 temp->setGroup(thisLookUp[k]->getGroup());
408 newLookup.push_back(temp);
409 designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
413 for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
414 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
415 for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
418 // Allocate memory for thread data.
419 metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
420 pDataArray.push_back(tempSum);
421 processIDS.push_back(i);
423 hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
427 driver(lines[0].start, lines[0].num, thisLookUp);
429 //Wait until all threads have terminated.
430 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
432 //Close all thread handles and free memory allocations.
433 for(int i=0; i < pDataArray.size(); i++){
434 if (pDataArray[i]->count != (pDataArray[i]->num)) {
435 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true;
437 for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) { delete pDataArray[i]->thisLookUp[j]; }
438 for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {
439 outputNames.push_back(pDataArray[i]->outputNames[j]);
440 outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
443 CloseHandle(hThreadArray[i]);
444 delete pDataArray[i];
453 catch(exception& e) {
454 m->errorOut(e, "MetaStatsCommand", "process");
458 //**********************************************************************************************************************
459 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) {
463 for (int c = start; c < (start+num); c++) {
466 string setA = namesOfGroupCombos[c][0];
467 string setB = namesOfGroupCombos[c][1];
470 map<string, string> variables;
471 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
472 variables["[distance]"] = thisLookUp[0]->getLabel();
473 variables["[group]"] = setA + "-" + setB;
474 string outputFileName = getOutputFileName("metastats",variables);
475 outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
476 //int nameLength = outputFileName.length();
477 //char * output = new char[nameLength];
478 //strcpy(output, outputFileName.c_str());
480 //build matrix from shared rabunds
482 //data = new double*[thisLookUp[0]->getNumBins()];
484 vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
486 vector<SharedRAbundVector*> subset;
489 for (int i = 0; i < thisLookUp.size(); i++) {
490 string thisGroup = thisLookUp[i]->getGroup();
492 //is this group for a set we want to compare??
493 //sorting the sets by putting setB at the back and setA in the front
494 if ((designMap->getGroup(thisGroup) == setB)) {
495 subset.push_back(thisLookUp[i]);
497 }else if ((designMap->getGroup(thisGroup) == setA)) {
498 subset.insert(subset.begin()+setACount, thisLookUp[i]);
503 if ((setACount == 0) || (setBCount == 0)) {
504 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
505 outputNames.pop_back();
509 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
510 //data[j] = new double[subset.size()];
511 data2[j].resize(subset.size(), 0.0);
513 for (int i = 0; i < subset.size(); i++) {
514 data2[j][i] = (subset[i]->getAbundance(j));
518 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
519 //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
521 m->mothurOutEndLine();
522 MothurMetastats mothurMeta(threshold, iters);
523 mothurMeta.runMetastats(outputFileName , data2, setACount);
524 m->mothurOutEndLine();
525 m->mothurOutEndLine();
530 //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; }
537 catch(exception& e) {
538 m->errorOut(e, "MetaStatsCommand", "driver");
542 //**********************************************************************************************************************