]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
added citation function to commands
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::setParameters(){       
16         try {
17                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
18                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
19                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
20                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21                 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
24                 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "MetaStatsCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string MetaStatsCommand::getHelpString(){       
39         try {
40                 string helpString = "";
41                 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
42                 helpString += "The metastats command outputs a .metastats file. \n";
43                 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors.  The shared and design parameters are required, unless you have valid current files.\n";
44                 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
45                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
46                 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
47                 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n";
48                 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
49                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
50                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
51                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
52                 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
53                 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
54                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
56                 return helpString;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "MetaStatsCommand", "getHelpString");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 MetaStatsCommand::MetaStatsCommand(){   
65         try {
66                 abort = true; calledHelp = true;
67                 setParameters();
68                 vector<string> tempOutNames;
69                 outputTypes["metastats"] = tempOutNames;
70         }
71         catch(exception& e) {
72                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
73                 exit(1);
74         }
75 }
76 //**********************************************************************************************************************
77
78 MetaStatsCommand::MetaStatsCommand(string option) {
79         try {
80                 abort = false; calledHelp = false;   
81                 allLines = 1;
82                 
83                 
84                 //allow user to run help
85                 if(option == "help") { help(); abort = true; calledHelp = true; }
86                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
87                 
88                 else {
89                         vector<string> myArray = setParameters();
90                         
91                         OptionParser parser(option);
92                         map<string,string> parameters = parser.getParameters();
93                         
94                         ValidParameters validParameter;
95                         
96                         //check to make sure all parameters are valid for command
97                         map<string,string>::iterator it;
98                         for (it = parameters.begin(); it != parameters.end(); it++) { 
99                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
100                         }
101                         
102                         //initialize outputTypes
103                         vector<string> tempOutNames;
104                         outputTypes["metastats"] = tempOutNames;
105                         
106                         
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("design");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
118                                 }
119                                 
120                                 it = parameters.find("shared");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
126                                 }
127                                 
128                         }
129                         
130                         //check for required parameters
131                         sharedfile = validParameter.validFile(parameters, "shared", true);
132                         if (sharedfile == "not open") { abort = true; }
133                         else if (sharedfile == "not found") {                           //if there is a current shared file, use it
134                                 sharedfile = m->getSharedFile(); 
135                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
136                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
137                         }
138                         
139                         //check for required parameters
140                         designfile = validParameter.validFile(parameters, "design", true);
141                         if (designfile == "not open") { abort = true; }
142                         else if (designfile == "not found") {                           
143                                 //if there is a current design file, use it
144                                 designfile = m->getDesignFile(); 
145                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
146                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
147                         }
148                         
149                         //if the user changes the output directory command factory will send this info to us in the output parameter 
150                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
151                                 outputDir = ""; 
152                                 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it      
153                         }
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                         groups = validParameter.validFile(parameters, "groups", false);                 
165                         if (groups == "not found") { groups = ""; pickedGroups = false; }
166                         else { 
167                                 pickedGroups = true;
168                                 m->splitAtDash(groups, Groups);
169                                 m->Groups = Groups;
170                         }
171                         
172                         sets = validParameter.validFile(parameters, "sets", false);                     
173                         if (sets == "not found") { sets = ""; }
174                         else { 
175                                 m->splitAtDash(sets, Sets);
176                         }
177
178                         
179                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
180                         convert(temp, iters); 
181                         
182                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
183                         convert(temp, threshold); 
184                         
185                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
186                         m->setProcessors(temp);
187                         convert(temp, processors);
188                 }
189
190         }
191         catch(exception& e) {
192                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
193                 exit(1);
194         }
195 }
196 //**********************************************************************************************************************
197
198 int MetaStatsCommand::execute(){
199         try {
200         
201                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
202                 
203                 designMap = new GroupMap(designfile);
204                 designMap->readDesignMap();
205         
206                 input = new InputData(sharedfile, "sharedfile");
207                 lookup = input->getSharedRAbundVectors();
208                 string lastLabel = lookup[0]->getLabel();
209                 
210                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
211                 set<string> processedLabels;
212                 set<string> userLabels = labels;
213                 
214                 //setup the pairwise comparions of sets for metastats
215                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
216                 //make sure sets are all in designMap
217                 SharedUtil* util = new SharedUtil();  
218                 util->setGroups(Sets, designMap->namesOfGroups);  
219                 delete util;
220                 
221                 int numGroups = Sets.size();
222                 for (int a=0; a<numGroups; a++) { 
223                         for (int l = 0; l < a; l++) {   
224                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
225                                 namesOfGroupCombos.push_back(groups);
226                         }
227                 }
228         
229                 
230                 //only 1 combo
231                 if (numGroups == 2) { processors = 1; }
232                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
233                 
234                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
235                         if(processors != 1){
236                                 int numPairs = namesOfGroupCombos.size();
237                                 int numPairsPerProcessor = numPairs / processors;
238                         
239                                 for (int i = 0; i < processors; i++) {
240                                         int startPos = i * numPairsPerProcessor;
241                                         if(i == processors - 1){
242                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
243                                         }
244                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
245                                 }
246                         }
247                 #endif
248                 
249                 //as long as you are not at the end of the file or done wih the lines you want
250                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
251                         
252                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str()); } return 0; }
253         
254                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
255
256                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
257                                 process(lookup);
258                                 
259                                 processedLabels.insert(lookup[0]->getLabel());
260                                 userLabels.erase(lookup[0]->getLabel());
261                         }
262                         
263                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
264                                 string saveLabel = lookup[0]->getLabel();
265                         
266                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
267                                 lookup = input->getSharedRAbundVectors(lastLabel);
268                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
269                                 
270                                 process(lookup);
271                                 
272                                 processedLabels.insert(lookup[0]->getLabel());
273                                 userLabels.erase(lookup[0]->getLabel());
274                                 
275                                 //restore real lastlabel to save below
276                                 lookup[0]->setLabel(saveLabel);
277                         }
278                         
279                         lastLabel = lookup[0]->getLabel();
280                         //prevent memory leak
281                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
282                         
283                         if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
284
285                         //get next line to process
286                         lookup = input->getSharedRAbundVectors();                               
287                 }
288                 
289                 if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); }  return 0; }
290
291                 //output error messages about any remaining user labels
292                 set<string>::iterator it;
293                 bool needToRun = false;
294                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
295                         m->mothurOut("Your file does not include the label " + *it); 
296                         if (processedLabels.count(lastLabel) != 1) {
297                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
298                                 needToRun = true;
299                         }else {
300                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
301                         }
302                 }
303         
304                 //run last label if you need to
305                 if (needToRun == true)  {
306                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
307                         lookup = input->getSharedRAbundVectors(lastLabel);
308                         
309                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
310                         
311                         process(lookup);
312                         
313                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
314                 }
315         
316                 //reset groups parameter
317                 m->Groups.clear();  
318                 delete input; 
319                 delete designMap;
320                 
321                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); } return 0;}
322                 
323                 m->mothurOutEndLine();
324                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
325                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
326                 m->mothurOutEndLine();
327                 
328                 return 0;
329         }
330         catch(exception& e) {
331                 m->errorOut(e, "MetaStatsCommand", "execute");
332                 exit(1);
333         }
334 }
335 //**********************************************************************************************************************
336
337 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
338         try {
339                 
340                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
341                                 if(processors == 1){
342                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
343                                 }else{
344                                         int process = 1;
345                                         vector<int> processIDS;
346                 
347                                         //loop through and create all the processes you want
348                                         while (process != processors) {
349                                                 int pid = fork();
350                         
351                                                 if (pid > 0) {
352                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
353                                                         process++;
354                                                 }else if (pid == 0){
355                                                         driver(lines[process].start, lines[process].num, thisLookUp);
356                                                         exit(0);
357                                                 }else { 
358                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
359                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
360                                                         exit(0);
361                                                 }
362                                         }
363                                         
364                                         //do my part
365                                         driver(lines[0].start, lines[0].num, thisLookUp);
366                 
367                                         //force parent to wait until all the processes are done
368                                         for (int i=0;i<(processors-1);i++) { 
369                                                 int temp = processIDS[i];
370                                                 wait(&temp);
371                                         }
372                                 }
373                 #else
374                                 driver(0, namesOfGroupCombos.size(), thisLookUp);
375                 #endif
376
377                 return 0;
378                 
379         }
380         catch(exception& e) {
381                 m->errorOut(e, "MetaStatsCommand", "process");
382                 exit(1);
383         }
384 }
385 //**********************************************************************************************************************
386 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
387         try {
388         
389                 //for each combo
390                 for (int c = start; c < (start+num); c++) {
391                         
392                         //get set names
393                         string setA = namesOfGroupCombos[c][0]; 
394                         string setB = namesOfGroupCombos[c][1];
395                         
396                         //get filename
397                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
398                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
399                         int nameLength = outputFileName.length();
400                         char * output = new char[nameLength];
401                         strcpy(output, outputFileName.c_str());
402         
403                         //build matrix from shared rabunds
404                         double** data;
405                         data = new double*[thisLookUp[0]->getNumBins()];
406                         
407                         vector<SharedRAbundVector*> subset;
408                         int setACount = 0;
409                         int setBCount = 0;
410                         for (int i = 0; i < thisLookUp.size(); i++) {
411                                 string thisGroup = thisLookUp[i]->getGroup();
412                                 
413                                 //is this group for a set we want to compare??
414                                 //sorting the sets by putting setB at the back and setA in the front
415                                 if ((designMap->getGroup(thisGroup) == setB)) {  
416                                         subset.push_back(thisLookUp[i]);
417                                         setBCount++;
418                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
419                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
420                                         setACount++;
421                                 }
422                         }
423                                                 
424                         if ((setACount == 0) || (setBCount == 0))  { 
425                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
426                                 outputNames.pop_back();
427                         }else {
428                                 //fill data
429                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
430                                         data[j] = new double[subset.size()];
431                                         for (int i = 0; i < subset.size(); i++) {
432                                                 data[j][i] = (subset[i]->getAbundance(j));
433                                         }
434                                 }
435                                 
436                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
437                                 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
438                                 
439                                 m->mothurOutEndLine(); 
440                         }
441                         
442                         //free memory
443                         delete output;
444                         for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
445                         delete[] data; 
446                 }
447                 
448                 return 0;
449
450         }
451         catch(exception& e) {
452                 m->errorOut(e, "MetaStatsCommand", "driver");
453                 exit(1);
454         }
455 }
456 //**********************************************************************************************************************