]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
fixed order of unifrac commands so that it matches the summary commands
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13
14 //**********************************************************************************************************************
15
16 MetaStatsCommand::MetaStatsCommand(string option) {
17         try {
18                 globaldata = GlobalData::getInstance();
19                 abort = false;
20                 allLines = 1;
21                 labels.clear();
22                 
23                 //allow user to run help
24                 if(option == "help") { help(); abort = true; }
25                 
26                 else {
27                         //valid paramters for this command
28                         string AlignArray[] =  {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
29                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
30                         
31                         OptionParser parser(option);
32                         map<string,string> parameters = parser.getParameters();
33                         
34                         ValidParameters validParameter;
35                         
36                         //check to make sure all parameters are valid for command
37                         map<string,string>::iterator it;
38                         for (it = parameters.begin(); it != parameters.end(); it++) { 
39                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
40                         }
41                         
42                         //if the user changes the output directory command factory will send this info to us in the output parameter 
43                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
44                                 outputDir = ""; 
45                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
46                         }
47                         
48                         //if the user changes the input directory command factory will send this info to us in the output parameter 
49                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
50                         if (inputDir == "not found"){   inputDir = "";          }
51                         else {
52                                 string path;
53                                 it = parameters.find("design");
54                                 //user has given a template file
55                                 if(it != parameters.end()){ 
56                                         path = m->hasPath(it->second);
57                                         //if the user has not given a path then, add inputdir. else leave path alone.
58                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
59                                 }
60                         }
61                         
62                         //check for required parameters
63                         designfile = validParameter.validFile(parameters, "design", true);
64                         if (designfile == "not open") { abort = true; }
65                         else if (designfile == "not found") {  designfile = "";  m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }       
66                         
67                         //make sure the user has already run the read.otu command
68                         if ((globaldata->getSharedFile() == "")) {
69                                  m->mothurOut("You must read a list and a group, or a shared file before you can use the normalize.shared command."); m->mothurOutEndLine(); abort = true; 
70                         }
71
72                         //check for optional parameter and set defaults
73                         // ...at some point should added some additional type checking...
74                         label = validParameter.validFile(parameters, "label", false);                   
75                         if (label == "not found") { label = ""; }
76                         else { 
77                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
78                                 else { allLines = 1;  }
79                         }
80                         
81                         //if the user has not specified any labels use the ones from read.otu
82                         if (label == "") {  
83                                 allLines = globaldata->allLines; 
84                                 labels = globaldata->labels; 
85                         }
86                         
87                         groups = validParameter.validFile(parameters, "groups", false);                 
88                         if (groups == "not found") { groups = ""; pickedGroups = false; }
89                         else { 
90                                 pickedGroups = true;
91                                 m->splitAtDash(groups, Groups);
92                                 globaldata->Groups = Groups;
93                         }
94                         
95                         sets = validParameter.validFile(parameters, "sets", false);                     
96                         if (sets == "not found") { sets = ""; }
97                         else { 
98                                 m->splitAtDash(sets, Sets);
99                         }
100
101                         
102                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
103                         convert(temp, iters); 
104                         
105                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
106                         convert(temp, threshold); 
107                         
108                         temp = validParameter.validFile(parameters, "processors", false);                       if (temp == "not found"){       temp = "1";     }
109                         convert(temp, processors); 
110                 }
111
112         }
113         catch(exception& e) {
114                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
115                 exit(1);
116         }
117 }
118
119 //**********************************************************************************************************************
120
121 void MetaStatsCommand::help(){
122         try {
123                 m->mothurOut("This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n");
124                 m->mothurOut("The metastats command can only be executed after a successful read.otu command of a list and group or shared file.\n");
125                 m->mothurOut("The metastats command outputs a .metastats file. \n");
126                 m->mothurOut("The metastats command parameters are iters, threshold, groups, label, design, sets and processors.  The design parameter is required.\n");
127                 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n");
128                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
129                 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
130                 m->mothurOut("The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n");
131                 m->mothurOut("The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n");
132                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
133                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
134                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
135                 m->mothurOut("The metastats command should be in the following format: metastats(design=yourDesignFile).\n");
136                 m->mothurOut("Example metastats(design=temp.design, groups=A-B-C).\n");
137                 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
138                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
139
140         }
141         catch(exception& e) {
142                 m->errorOut(e, "MetaStatsCommand", "help");
143                 exit(1);
144         }
145 }
146
147 //**********************************************************************************************************************
148
149 MetaStatsCommand::~MetaStatsCommand(){}
150
151 //**********************************************************************************************************************
152
153 int MetaStatsCommand::execute(){
154         try {
155         
156                 if (abort == true) { return 0; }
157                 
158                 designMap = new GroupMap(designfile);
159                 designMap->readDesignMap();
160                 
161                 read = new ReadOTUFile(globaldata->inputFileName);      
162                 read->read(&*globaldata); 
163                 input = globaldata->ginput;
164                 lookup = input->getSharedRAbundVectors();
165                 string lastLabel = lookup[0]->getLabel();
166                 
167                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
168                 set<string> processedLabels;
169                 set<string> userLabels = labels;
170                 
171                 //setup the pairwise comparions of sets for metastats
172                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
173                 //make sure sets are all in designMap
174                 SharedUtil* util = new SharedUtil();  
175                 util->setGroups(Sets, designMap->namesOfGroups);  
176                 delete util;
177                 
178                 int numGroups = Sets.size();
179                 for (int a=0; a<numGroups; a++) { 
180                         for (int l = 0; l < a; l++) {   
181                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
182                                 namesOfGroupCombos.push_back(groups);
183                         }
184                 }
185         
186                 
187                 //only 1 combo
188                 if (numGroups == 2) { processors = 1; }
189                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
190                 
191                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
192                         if(processors != 1){
193                                 int numPairs = namesOfGroupCombos.size();
194                                 int numPairsPerProcessor = numPairs / processors;
195                         
196                                 for (int i = 0; i < processors; i++) {
197                                         int startPos = i * numPairsPerProcessor;
198                                         if(i == processors - 1){
199                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
200                                         }
201                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
202                                 }
203                         }
204                 #endif
205                 
206                 //as long as you are not at the end of the file or done wih the lines you want
207                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
208                         
209                         if (m->control_pressed) {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear(); delete read;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0; }
210         
211                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
212
213                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
214                                 process(lookup);
215                                 
216                                 processedLabels.insert(lookup[0]->getLabel());
217                                 userLabels.erase(lookup[0]->getLabel());
218                         }
219                         
220                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
221                                 string saveLabel = lookup[0]->getLabel();
222                         
223                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
224                                 lookup = input->getSharedRAbundVectors(lastLabel);
225                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
226                                 
227                                 process(lookup);
228                                 
229                                 processedLabels.insert(lookup[0]->getLabel());
230                                 userLabels.erase(lookup[0]->getLabel());
231                                 
232                                 //restore real lastlabel to save below
233                                 lookup[0]->setLabel(saveLabel);
234                         }
235                         
236                         lastLabel = lookup[0]->getLabel();
237                         //prevent memory leak
238                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
239                         
240                         if (m->control_pressed) {  globaldata->Groups.clear(); delete read;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0; }
241
242                         //get next line to process
243                         lookup = input->getSharedRAbundVectors();                               
244                 }
245                 
246                 if (m->control_pressed) { globaldata->Groups.clear(); delete read; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {    remove(outputNames[i].c_str()); }  return 0; }
247
248                 //output error messages about any remaining user labels
249                 set<string>::iterator it;
250                 bool needToRun = false;
251                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
252                         m->mothurOut("Your file does not include the label " + *it); 
253                         if (processedLabels.count(lastLabel) != 1) {
254                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
255                                 needToRun = true;
256                         }else {
257                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
258                         }
259                 }
260         
261                 //run last label if you need to
262                 if (needToRun == true)  {
263                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
264                         lookup = input->getSharedRAbundVectors(lastLabel);
265                         
266                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
267                         
268                         process(lookup);
269                         
270                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
271                 }
272         
273                 //reset groups parameter
274                 globaldata->Groups.clear();  
275                 delete input; globaldata->ginput = NULL;
276                 delete read;
277                 delete designMap;
278                 
279                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0;}
280                 
281                 m->mothurOutEndLine();
282                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
283                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
284                 m->mothurOutEndLine();
285                 
286                 return 0;
287         }
288         catch(exception& e) {
289                 m->errorOut(e, "MetaStatsCommand", "execute");
290                 exit(1);
291         }
292 }
293 //**********************************************************************************************************************
294
295 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
296         try {
297                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
298                 
299                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
300                                 if(processors == 1){
301                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
302                                 }else{
303                                         int process = 1;
304                                         vector<int> processIDS;
305                 
306                                         //loop through and create all the processes you want
307                                         while (process != processors) {
308                                                 int pid = fork();
309                         
310                                                 if (pid > 0) {
311                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
312                                                         process++;
313                                                 }else if (pid == 0){
314                                                         driver(lines[process].start, lines[process].num, thisLookUp);
315                                                         exit(0);
316                                                 }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
317                                         }
318                                         
319                                         //do my part
320                                         driver(lines[0].start, lines[0].num, thisLookUp);
321                 
322                                         //force parent to wait until all the processes are done
323                                         for (int i=0;i<(processors-1);i++) { 
324                                                 int temp = processIDS[i];
325                                                 wait(&temp);
326                                         }
327                                 }
328                 #else
329                                 driver(0, namesOfGroupCombos.size(), thisLookUp);
330                 #endif
331
332                 return 0;
333                 
334         }
335         catch(exception& e) {
336                 m->errorOut(e, "MetaStatsCommand", "process");
337                 exit(1);
338         }
339 }
340 //**********************************************************************************************************************
341 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
342         try {
343         
344                 //for each combo
345                 for (int c = start; c < (start+num); c++) {
346                         
347                         //get set names
348                         string setA = namesOfGroupCombos[c][0]; 
349                         string setB = namesOfGroupCombos[c][1];
350                         
351                         //get filename
352                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(globaldata->inputFileName)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
353                         outputNames.push_back(outputFileName);
354                         int nameLength = outputFileName.length();
355                         char * output = new char[nameLength];
356                         strcpy(output, outputFileName.c_str());
357         
358                         //build matrix from shared rabunds
359                         double** data;
360                         data = new double*[thisLookUp[0]->getNumBins()];
361                         
362                         vector<SharedRAbundVector*> subset;
363                         int setACount = 0;
364                         int setBCount = 0;
365                         for (int i = 0; i < thisLookUp.size(); i++) {
366                                 string thisGroup = thisLookUp[i]->getGroup();
367                                 
368                                 //is this group for a set we want to compare??
369                                 //sorting the sets by putting setB at the back and setA in the front
370                                 if ((designMap->getGroup(thisGroup) == setB)) {  
371                                         subset.push_back(thisLookUp[i]);
372                                         setBCount++;
373                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
374                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
375                                         setACount++;
376                                 }
377                         }
378                                                 
379                         if ((setACount == 0) || (setBCount == 0))  { 
380                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
381                                 outputNames.pop_back();
382                         }else {
383                                 //fill data
384                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
385                                         data[j] = new double[subset.size()];
386                                         for (int i = 0; i < subset.size(); i++) {
387                                                 data[j][i] = (subset[i]->getAbundance(j));
388                                         }
389                                 }
390                                 
391                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
392                                 
393                                 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
394                                 
395                                 m->mothurOutEndLine(); 
396                         }
397                         
398                         //free memory
399                         delete output;
400                         for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
401                         delete[] data; 
402                 }
403                 
404                 return 0;
405
406         }
407         catch(exception& e) {
408                 m->errorOut(e, "MetaStatsCommand", "driver");
409                 exit(1);
410         }
411 }
412 //**********************************************************************************************************************
413 int MetaStatsCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
414         try {
415                 
416                 vector<SharedRAbundVector*> newLookup;
417                 for (int i = 0; i < thislookup.size(); i++) {
418                         SharedRAbundVector* temp = new SharedRAbundVector();
419                         temp->setLabel(thislookup[i]->getLabel());
420                         temp->setGroup(thislookup[i]->getGroup());
421                         newLookup.push_back(temp);
422                 }
423                 
424                 //for each bin
425                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
426                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
427                 
428                         //look at each sharedRabund and make sure they are not all zero
429                         bool allZero = true;
430                         for (int j = 0; j < thislookup.size(); j++) {
431                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
432                         }
433                         
434                         //if they are not all zero add this bin
435                         if (!allZero) {
436                                 for (int j = 0; j < thislookup.size(); j++) {
437                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
438                                 }
439                         }
440                 }
441
442                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
443
444                 thislookup = newLookup;
445                 
446                 return 0;
447  
448         }
449         catch(exception& e) {
450                 m->errorOut(e, "MetaStatsCommand", "eliminateZeroOTUS");
451                 exit(1);
452         }
453 }
454 //**********************************************************************************************************************