]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
added getCommandInfoCommand for gui
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::setParameters(){       
16         try {
17                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
18                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
19                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
20                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21                 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
24                 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "MetaStatsCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string MetaStatsCommand::getHelpString(){       
39         try {
40                 string helpString = "";
41                 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
42                 helpString += "The metastats command outputs a .metastats file. \n";
43                 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors.  The shared and design parameters are required, unless you have valid current files.\n";
44                 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
45                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
46                 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
47                 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n";
48                 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
49                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
50                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
51                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
52                 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
53                 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
54                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
56                 return helpString;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "MetaStatsCommand", "getHelpString");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 MetaStatsCommand::MetaStatsCommand(){   
65         try {
66                 abort = true; calledHelp = true;
67                 setParameters();
68                 vector<string> tempOutNames;
69                 outputTypes["metastats"] = tempOutNames;
70         }
71         catch(exception& e) {
72                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
73                 exit(1);
74         }
75 }
76 //**********************************************************************************************************************
77
78 MetaStatsCommand::MetaStatsCommand(string option) {
79         try {
80                 abort = false; calledHelp = false;   
81                 allLines = 1;
82                 
83                 
84                 //allow user to run help
85                 if(option == "help") { help(); abort = true; calledHelp = true; }
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string,string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         
95                         //check to make sure all parameters are valid for command
96                         map<string,string>::iterator it;
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["metastats"] = tempOutNames;
104                         
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("design");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
117                                 }
118                                 
119                                 it = parameters.find("shared");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
125                                 }
126                                 
127                         }
128                         
129                         //check for required parameters
130                         sharedfile = validParameter.validFile(parameters, "shared", true);
131                         if (sharedfile == "not open") { abort = true; }
132                         else if (sharedfile == "not found") {                           //if there is a current shared file, use it
133                                 sharedfile = m->getSharedFile(); 
134                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
135                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
136                         }
137                         
138                         //check for required parameters
139                         designfile = validParameter.validFile(parameters, "design", true);
140                         if (designfile == "not open") { abort = true; }
141                         else if (designfile == "not found") {                           
142                                 //if there is a current design file, use it
143                                 designfile = m->getDesignFile(); 
144                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
145                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
146                         }
147                         
148                         //if the user changes the output directory command factory will send this info to us in the output parameter 
149                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
150                                 outputDir = ""; 
151                                 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it      
152                         }
153
154                         //check for optional parameter and set defaults
155                         // ...at some point should added some additional type checking...
156                         label = validParameter.validFile(parameters, "label", false);                   
157                         if (label == "not found") { label = ""; }
158                         else { 
159                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
160                                 else { allLines = 1;  }
161                         }
162                         
163                         groups = validParameter.validFile(parameters, "groups", false);                 
164                         if (groups == "not found") { groups = ""; pickedGroups = false; }
165                         else { 
166                                 pickedGroups = true;
167                                 m->splitAtDash(groups, Groups);
168                                 m->Groups = Groups;
169                         }
170                         
171                         sets = validParameter.validFile(parameters, "sets", false);                     
172                         if (sets == "not found") { sets = ""; }
173                         else { 
174                                 m->splitAtDash(sets, Sets);
175                         }
176
177                         
178                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
179                         convert(temp, iters); 
180                         
181                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
182                         convert(temp, threshold); 
183                         
184                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
185                         m->setProcessors(temp);
186                         convert(temp, processors);
187                 }
188
189         }
190         catch(exception& e) {
191                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
192                 exit(1);
193         }
194 }
195 //**********************************************************************************************************************
196
197 int MetaStatsCommand::execute(){
198         try {
199         
200                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
201                 
202                 designMap = new GroupMap(designfile);
203                 designMap->readDesignMap();
204         
205                 input = new InputData(sharedfile, "sharedfile");
206                 lookup = input->getSharedRAbundVectors();
207                 string lastLabel = lookup[0]->getLabel();
208                 
209                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
210                 set<string> processedLabels;
211                 set<string> userLabels = labels;
212                 
213                 //setup the pairwise comparions of sets for metastats
214                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
215                 //make sure sets are all in designMap
216                 SharedUtil* util = new SharedUtil();  
217                 util->setGroups(Sets, designMap->namesOfGroups);  
218                 delete util;
219                 
220                 int numGroups = Sets.size();
221                 for (int a=0; a<numGroups; a++) { 
222                         for (int l = 0; l < a; l++) {   
223                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
224                                 namesOfGroupCombos.push_back(groups);
225                         }
226                 }
227         
228                 
229                 //only 1 combo
230                 if (numGroups == 2) { processors = 1; }
231                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
232                 
233                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
234                         if(processors != 1){
235                                 int numPairs = namesOfGroupCombos.size();
236                                 int numPairsPerProcessor = numPairs / processors;
237                         
238                                 for (int i = 0; i < processors; i++) {
239                                         int startPos = i * numPairsPerProcessor;
240                                         if(i == processors - 1){
241                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
242                                         }
243                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
244                                 }
245                         }
246                 #endif
247                 
248                 //as long as you are not at the end of the file or done wih the lines you want
249                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
250                         
251                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str()); } return 0; }
252         
253                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
254
255                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
256                                 process(lookup);
257                                 
258                                 processedLabels.insert(lookup[0]->getLabel());
259                                 userLabels.erase(lookup[0]->getLabel());
260                         }
261                         
262                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
263                                 string saveLabel = lookup[0]->getLabel();
264                         
265                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
266                                 lookup = input->getSharedRAbundVectors(lastLabel);
267                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
268                                 
269                                 process(lookup);
270                                 
271                                 processedLabels.insert(lookup[0]->getLabel());
272                                 userLabels.erase(lookup[0]->getLabel());
273                                 
274                                 //restore real lastlabel to save below
275                                 lookup[0]->setLabel(saveLabel);
276                         }
277                         
278                         lastLabel = lookup[0]->getLabel();
279                         //prevent memory leak
280                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
281                         
282                         if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
283
284                         //get next line to process
285                         lookup = input->getSharedRAbundVectors();                               
286                 }
287                 
288                 if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); }  return 0; }
289
290                 //output error messages about any remaining user labels
291                 set<string>::iterator it;
292                 bool needToRun = false;
293                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
294                         m->mothurOut("Your file does not include the label " + *it); 
295                         if (processedLabels.count(lastLabel) != 1) {
296                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
297                                 needToRun = true;
298                         }else {
299                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
300                         }
301                 }
302         
303                 //run last label if you need to
304                 if (needToRun == true)  {
305                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
306                         lookup = input->getSharedRAbundVectors(lastLabel);
307                         
308                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
309                         
310                         process(lookup);
311                         
312                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
313                 }
314         
315                 //reset groups parameter
316                 m->Groups.clear();  
317                 delete input; 
318                 delete designMap;
319                 
320                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); } return 0;}
321                 
322                 m->mothurOutEndLine();
323                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
324                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
325                 m->mothurOutEndLine();
326                 
327                 return 0;
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "MetaStatsCommand", "execute");
331                 exit(1);
332         }
333 }
334 //**********************************************************************************************************************
335
336 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
337         try {
338                 
339                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
340                                 if(processors == 1){
341                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
342                                 }else{
343                                         int process = 1;
344                                         vector<int> processIDS;
345                 
346                                         //loop through and create all the processes you want
347                                         while (process != processors) {
348                                                 int pid = fork();
349                         
350                                                 if (pid > 0) {
351                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
352                                                         process++;
353                                                 }else if (pid == 0){
354                                                         driver(lines[process].start, lines[process].num, thisLookUp);
355                                                         exit(0);
356                                                 }else { 
357                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
358                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
359                                                         exit(0);
360                                                 }
361                                         }
362                                         
363                                         //do my part
364                                         driver(lines[0].start, lines[0].num, thisLookUp);
365                 
366                                         //force parent to wait until all the processes are done
367                                         for (int i=0;i<(processors-1);i++) { 
368                                                 int temp = processIDS[i];
369                                                 wait(&temp);
370                                         }
371                                 }
372                 #else
373                                 driver(0, namesOfGroupCombos.size(), thisLookUp);
374                 #endif
375
376                 return 0;
377                 
378         }
379         catch(exception& e) {
380                 m->errorOut(e, "MetaStatsCommand", "process");
381                 exit(1);
382         }
383 }
384 //**********************************************************************************************************************
385 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
386         try {
387         
388                 //for each combo
389                 for (int c = start; c < (start+num); c++) {
390                         
391                         //get set names
392                         string setA = namesOfGroupCombos[c][0]; 
393                         string setB = namesOfGroupCombos[c][1];
394                         
395                         //get filename
396                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
397                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
398                         int nameLength = outputFileName.length();
399                         char * output = new char[nameLength];
400                         strcpy(output, outputFileName.c_str());
401         
402                         //build matrix from shared rabunds
403                         double** data;
404                         data = new double*[thisLookUp[0]->getNumBins()];
405                         
406                         vector<SharedRAbundVector*> subset;
407                         int setACount = 0;
408                         int setBCount = 0;
409                         for (int i = 0; i < thisLookUp.size(); i++) {
410                                 string thisGroup = thisLookUp[i]->getGroup();
411                                 
412                                 //is this group for a set we want to compare??
413                                 //sorting the sets by putting setB at the back and setA in the front
414                                 if ((designMap->getGroup(thisGroup) == setB)) {  
415                                         subset.push_back(thisLookUp[i]);
416                                         setBCount++;
417                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
418                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
419                                         setACount++;
420                                 }
421                         }
422                                                 
423                         if ((setACount == 0) || (setBCount == 0))  { 
424                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
425                                 outputNames.pop_back();
426                         }else {
427                                 //fill data
428                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
429                                         data[j] = new double[subset.size()];
430                                         for (int i = 0; i < subset.size(); i++) {
431                                                 data[j][i] = (subset[i]->getAbundance(j));
432                                         }
433                                 }
434                                 
435                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
436                                 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
437                                 
438                                 m->mothurOutEndLine(); 
439                         }
440                         
441                         //free memory
442                         delete output;
443                         for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
444                         delete[] data; 
445                 }
446                 
447                 return 0;
448
449         }
450         catch(exception& e) {
451                 m->errorOut(e, "MetaStatsCommand", "driver");
452                 exit(1);
453         }
454 }
455 //**********************************************************************************************************************