5 * Created by westcott on 9/16/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "metastatscommand.h"
11 #include "sharedutilities.h"
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::setParameters(){
17 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
18 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
19 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
20 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
22 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
24 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "MetaStatsCommand", "setParameters");
37 //**********************************************************************************************************************
38 string MetaStatsCommand::getHelpString(){
40 string helpString = "";
41 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
42 helpString += "The metastats command outputs a .metastats file. \n";
43 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors. The shared and design parameters are required, unless you have valid current files.\n";
44 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
45 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
46 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
47 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n";
48 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
49 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
50 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
51 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
52 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
53 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
54 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
55 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59 m->errorOut(e, "MetaStatsCommand", "getHelpString");
63 //**********************************************************************************************************************
64 string MetaStatsCommand::getOutputFileNameTag(string type, string inputName=""){
66 string outputFileName = "";
67 map<string, vector<string> >::iterator it;
69 //is this a type this command creates
70 it = outputTypes.find(type);
71 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
73 if (type == "metastats") { outputFileName = "metastats"; }
74 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
76 return outputFileName;
79 m->errorOut(e, "MetaStatsCommand", "getOutputFileNameTag");
84 //**********************************************************************************************************************
85 MetaStatsCommand::MetaStatsCommand(){
87 abort = true; calledHelp = true;
89 vector<string> tempOutNames;
90 outputTypes["metastats"] = tempOutNames;
93 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
97 //**********************************************************************************************************************
99 MetaStatsCommand::MetaStatsCommand(string option) {
101 abort = false; calledHelp = false;
105 //allow user to run help
106 if(option == "help") { help(); abort = true; calledHelp = true; }
107 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110 vector<string> myArray = setParameters();
112 OptionParser parser(option);
113 map<string,string> parameters = parser.getParameters();
115 ValidParameters validParameter;
117 //check to make sure all parameters are valid for command
118 map<string,string>::iterator it;
119 for (it = parameters.begin(); it != parameters.end(); it++) {
120 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
123 //initialize outputTypes
124 vector<string> tempOutNames;
125 outputTypes["metastats"] = tempOutNames;
128 //if the user changes the input directory command factory will send this info to us in the output parameter
129 string inputDir = validParameter.validFile(parameters, "inputdir", false);
130 if (inputDir == "not found"){ inputDir = ""; }
133 it = parameters.find("design");
134 //user has given a template file
135 if(it != parameters.end()){
136 path = m->hasPath(it->second);
137 //if the user has not given a path then, add inputdir. else leave path alone.
138 if (path == "") { parameters["design"] = inputDir + it->second; }
141 it = parameters.find("shared");
142 //user has given a template file
143 if(it != parameters.end()){
144 path = m->hasPath(it->second);
145 //if the user has not given a path then, add inputdir. else leave path alone.
146 if (path == "") { parameters["shared"] = inputDir + it->second; }
151 //check for required parameters
152 sharedfile = validParameter.validFile(parameters, "shared", true);
153 if (sharedfile == "not open") { abort = true; }
154 else if (sharedfile == "not found") { //if there is a current shared file, use it
155 sharedfile = m->getSharedFile();
156 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
157 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
158 }else { m->setSharedFile(sharedfile); }
160 //check for required parameters
161 designfile = validParameter.validFile(parameters, "design", true);
162 if (designfile == "not open") { abort = true; }
163 else if (designfile == "not found") {
164 //if there is a current design file, use it
165 designfile = m->getDesignFile();
166 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
167 else { m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
168 }else { m->setDesignFile(designfile); }
170 //if the user changes the output directory command factory will send this info to us in the output parameter
171 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
173 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it
176 //check for optional parameter and set defaults
177 // ...at some point should added some additional type checking...
178 label = validParameter.validFile(parameters, "label", false);
179 if (label == "not found") { label = ""; }
181 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
182 else { allLines = 1; }
185 groups = validParameter.validFile(parameters, "groups", false);
186 if (groups == "not found") { groups = ""; pickedGroups = false; }
189 m->splitAtDash(groups, Groups);
190 m->setGroups(Groups);
193 sets = validParameter.validFile(parameters, "sets", false);
194 if (sets == "not found") { sets = ""; }
196 m->splitAtDash(sets, Sets);
200 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
201 m->mothurConvert(temp, iters);
203 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; }
204 m->mothurConvert(temp, threshold);
206 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
207 m->setProcessors(temp);
208 m->mothurConvert(temp, processors);
212 catch(exception& e) {
213 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
217 //**********************************************************************************************************************
219 int MetaStatsCommand::execute(){
222 if (abort == true) { if (calledHelp) { return 0; } return 2; }
224 designMap = new GroupMap(designfile);
225 designMap->readDesignMap();
227 input = new InputData(sharedfile, "sharedfile");
228 lookup = input->getSharedRAbundVectors();
229 string lastLabel = lookup[0]->getLabel();
231 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
232 set<string> processedLabels;
233 set<string> userLabels = labels;
235 //setup the pairwise comparions of sets for metastats
236 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
237 //make sure sets are all in designMap
238 SharedUtil* util = new SharedUtil();
239 vector<string> dGroups = designMap->getNamesOfGroups();
240 util->setGroups(Sets, dGroups);
243 int numGroups = Sets.size();
244 for (int a=0; a<numGroups; a++) {
245 for (int l = 0; l < a; l++) {
246 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
247 namesOfGroupCombos.push_back(groups);
253 if (numGroups == 2) { processors = 1; }
254 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
257 int numPairs = namesOfGroupCombos.size();
258 int numPairsPerProcessor = numPairs / processors;
260 for (int i = 0; i < processors; i++) {
261 int startPos = i * numPairsPerProcessor;
262 if(i == processors - 1){
263 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
265 lines.push_back(linePair(startPos, numPairsPerProcessor));
269 //as long as you are not at the end of the file or done wih the lines you want
270 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
272 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
274 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
276 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
279 processedLabels.insert(lookup[0]->getLabel());
280 userLabels.erase(lookup[0]->getLabel());
283 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
284 string saveLabel = lookup[0]->getLabel();
286 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
287 lookup = input->getSharedRAbundVectors(lastLabel);
288 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
292 processedLabels.insert(lookup[0]->getLabel());
293 userLabels.erase(lookup[0]->getLabel());
295 //restore real lastlabel to save below
296 lookup[0]->setLabel(saveLabel);
299 lastLabel = lookup[0]->getLabel();
300 //prevent memory leak
301 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
303 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
305 //get next line to process
306 lookup = input->getSharedRAbundVectors();
309 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
311 //output error messages about any remaining user labels
312 set<string>::iterator it;
313 bool needToRun = false;
314 for (it = userLabels.begin(); it != userLabels.end(); it++) {
315 m->mothurOut("Your file does not include the label " + *it);
316 if (processedLabels.count(lastLabel) != 1) {
317 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
320 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
324 //run last label if you need to
325 if (needToRun == true) {
326 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
327 lookup = input->getSharedRAbundVectors(lastLabel);
329 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
333 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
336 //reset groups parameter
341 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;}
343 m->mothurOutEndLine();
344 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
345 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
346 m->mothurOutEndLine();
350 catch(exception& e) {
351 m->errorOut(e, "MetaStatsCommand", "execute");
355 //**********************************************************************************************************************
357 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
362 driver(0, namesOfGroupCombos.size(), thisLookUp);
365 vector<int> processIDS;
366 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
367 //loop through and create all the processes you want
368 while (process != processors) {
372 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
375 driver(lines[process].start, lines[process].num, thisLookUp);
378 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
379 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
385 driver(lines[0].start, lines[0].num, thisLookUp);
387 //force parent to wait until all the processes are done
388 for (int i=0;i<(processors-1);i++) {
389 int temp = processIDS[i];
394 //////////////////////////////////////////////////////////////////////////////////////////////////////
395 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
396 //Above fork() will clone, so memory is separate, but that's not the case with windows,
397 //Taking advantage of shared memory to pass results vectors.
398 //////////////////////////////////////////////////////////////////////////////////////////////////////
400 vector<metastatsData*> pDataArray;
401 DWORD dwThreadIdArray[processors-1];
402 HANDLE hThreadArray[processors-1];
404 //Create processor worker threads.
405 for( int i=1; i<processors; i++ ){
407 //make copy of lookup so we don't get access violations
408 vector<SharedRAbundVector*> newLookup;
409 vector<string> designMapGroups;
410 for (int k = 0; k < thisLookUp.size(); k++) {
411 SharedRAbundVector* temp = new SharedRAbundVector();
412 temp->setLabel(thisLookUp[k]->getLabel());
413 temp->setGroup(thisLookUp[k]->getGroup());
414 newLookup.push_back(temp);
415 designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
419 for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
420 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
421 for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
424 // Allocate memory for thread data.
425 metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
426 pDataArray.push_back(tempSum);
427 processIDS.push_back(i);
429 hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
433 driver(lines[0].start, lines[0].num, thisLookUp);
435 //Wait until all threads have terminated.
436 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
438 //Close all thread handles and free memory allocations.
439 for(int i=0; i < pDataArray.size(); i++){
440 for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) { delete pDataArray[i]->thisLookUp[j]; }
441 for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {
442 outputNames.push_back(pDataArray[i]->outputNames[j]);
443 outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
446 CloseHandle(hThreadArray[i]);
447 delete pDataArray[i];
456 catch(exception& e) {
457 m->errorOut(e, "MetaStatsCommand", "process");
461 //**********************************************************************************************************************
462 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) {
466 for (int c = start; c < (start+num); c++) {
469 string setA = namesOfGroupCombos[c][0];
470 string setB = namesOfGroupCombos[c][1];
473 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + "." + getOutputFileNameTag("metastats");
474 outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
475 //int nameLength = outputFileName.length();
476 //char * output = new char[nameLength];
477 //strcpy(output, outputFileName.c_str());
479 //build matrix from shared rabunds
481 //data = new double*[thisLookUp[0]->getNumBins()];
483 vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
485 vector<SharedRAbundVector*> subset;
488 for (int i = 0; i < thisLookUp.size(); i++) {
489 string thisGroup = thisLookUp[i]->getGroup();
491 //is this group for a set we want to compare??
492 //sorting the sets by putting setB at the back and setA in the front
493 if ((designMap->getGroup(thisGroup) == setB)) {
494 subset.push_back(thisLookUp[i]);
496 }else if ((designMap->getGroup(thisGroup) == setA)) {
497 subset.insert(subset.begin()+setACount, thisLookUp[i]);
502 if ((setACount == 0) || (setBCount == 0)) {
503 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
504 outputNames.pop_back();
508 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
509 //data[j] = new double[subset.size()];
510 data2[j].resize(subset.size(), 0.0);
512 for (int i = 0; i < subset.size(); i++) {
513 data2[j][i] = (subset[i]->getAbundance(j));
517 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
518 //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
520 m->mothurOutEndLine();
521 MothurMetastats mothurMeta(threshold, iters);
522 mothurMeta.runMetastats(outputFileName , data2, setACount);
523 m->mothurOutEndLine();
524 m->mothurOutEndLine();
529 //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; }
536 catch(exception& e) {
537 m->errorOut(e, "MetaStatsCommand", "driver");
541 //**********************************************************************************************************************