]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
fixes while testing
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::getValidParameters(){  
16         try {
17                 string Array[] =  {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "MetaStatsCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 MetaStatsCommand::MetaStatsCommand(){   
28         try {
29                 abort = true;
30                 //initialize outputTypes
31                 vector<string> tempOutNames;
32                 outputTypes["metastats"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> MetaStatsCommand::getRequiredParameters(){       
41         try {
42                 string Array[] =  {"design"};
43                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "MetaStatsCommand", "getRequiredParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> MetaStatsCommand::getRequiredFiles(){    
53         try {
54                 string Array[] =  {"shared"};
55                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "MetaStatsCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64
65 MetaStatsCommand::MetaStatsCommand(string option) {
66         try {
67                 globaldata = GlobalData::getInstance();
68                 abort = false;
69                 allLines = 1;
70                 labels.clear();
71                 
72                 //allow user to run help
73                 if(option == "help") { help(); abort = true; }
74                 
75                 else {
76                         //valid paramters for this command
77                         string AlignArray[] =  {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
78                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
79                         
80                         OptionParser parser(option);
81                         map<string,string> parameters = parser.getParameters();
82                         
83                         ValidParameters validParameter;
84                         
85                         //check to make sure all parameters are valid for command
86                         map<string,string>::iterator it;
87                         for (it = parameters.begin(); it != parameters.end(); it++) { 
88                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
89                         }
90                         
91                         //initialize outputTypes
92                         vector<string> tempOutNames;
93                         outputTypes["metastats"] = tempOutNames;
94                         
95                         //if the user changes the output directory command factory will send this info to us in the output parameter 
96                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
97                                 outputDir = ""; 
98                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
99                         }
100                         
101                         //if the user changes the input directory command factory will send this info to us in the output parameter 
102                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
103                         if (inputDir == "not found"){   inputDir = "";          }
104                         else {
105                                 string path;
106                                 it = parameters.find("design");
107                                 //user has given a template file
108                                 if(it != parameters.end()){ 
109                                         path = m->hasPath(it->second);
110                                         //if the user has not given a path then, add inputdir. else leave path alone.
111                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
112                                 }
113                         }
114                         
115                         //check for required parameters
116                         designfile = validParameter.validFile(parameters, "design", true);
117                         if (designfile == "not open") { abort = true; }
118                         else if (designfile == "not found") {  designfile = "";  m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }       
119                         
120                         //make sure the user has already run the read.otu command
121                         if ((globaldata->getSharedFile() == "")) {
122                                  m->mothurOut("You must read a list and a group, or a shared file before you can use the metastats command."); m->mothurOutEndLine(); abort = true; 
123                         }
124
125                         //check for optional parameter and set defaults
126                         // ...at some point should added some additional type checking...
127                         label = validParameter.validFile(parameters, "label", false);                   
128                         if (label == "not found") { label = ""; }
129                         else { 
130                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
131                                 else { allLines = 1;  }
132                         }
133                         
134                         //if the user has not specified any labels use the ones from read.otu
135                         if (label == "") {  
136                                 allLines = globaldata->allLines; 
137                                 labels = globaldata->labels; 
138                         }
139                         
140                         groups = validParameter.validFile(parameters, "groups", false);                 
141                         if (groups == "not found") { groups = ""; pickedGroups = false; }
142                         else { 
143                                 pickedGroups = true;
144                                 m->splitAtDash(groups, Groups);
145                                 globaldata->Groups = Groups;
146                         }
147                         
148                         sets = validParameter.validFile(parameters, "sets", false);                     
149                         if (sets == "not found") { sets = ""; }
150                         else { 
151                                 m->splitAtDash(sets, Sets);
152                         }
153
154                         
155                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
156                         convert(temp, iters); 
157                         
158                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
159                         convert(temp, threshold); 
160                         
161                         temp = validParameter.validFile(parameters, "processors", false);                       if (temp == "not found"){       temp = "1";     }
162                         convert(temp, processors); 
163                 }
164
165         }
166         catch(exception& e) {
167                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
168                 exit(1);
169         }
170 }
171
172 //**********************************************************************************************************************
173
174 void MetaStatsCommand::help(){
175         try {
176                 m->mothurOut("This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n");
177                 m->mothurOut("The metastats command can only be executed after a successful read.otu command of a list and group or shared file.\n");
178                 m->mothurOut("The metastats command outputs a .metastats file. \n");
179                 m->mothurOut("The metastats command parameters are iters, threshold, groups, label, design, sets and processors.  The design parameter is required.\n");
180                 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n");
181                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
182                 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
183                 m->mothurOut("The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n");
184                 m->mothurOut("The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n");
185                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
186                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
187                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
188                 m->mothurOut("The metastats command should be in the following format: metastats(design=yourDesignFile).\n");
189                 m->mothurOut("Example metastats(design=temp.design, groups=A-B-C).\n");
190                 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
191                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
192
193         }
194         catch(exception& e) {
195                 m->errorOut(e, "MetaStatsCommand", "help");
196                 exit(1);
197         }
198 }
199
200 //**********************************************************************************************************************
201
202 MetaStatsCommand::~MetaStatsCommand(){}
203
204 //**********************************************************************************************************************
205
206 int MetaStatsCommand::execute(){
207         try {
208         
209                 if (abort == true) { return 0; }
210                 
211                 designMap = new GroupMap(designfile);
212                 designMap->readDesignMap();
213                 
214                 read = new ReadOTUFile(globaldata->inputFileName);      
215                 read->read(&*globaldata); 
216                 input = globaldata->ginput;
217                 lookup = input->getSharedRAbundVectors();
218                 string lastLabel = lookup[0]->getLabel();
219                 
220                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
221                 set<string> processedLabels;
222                 set<string> userLabels = labels;
223                 
224                 //setup the pairwise comparions of sets for metastats
225                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
226                 //make sure sets are all in designMap
227                 SharedUtil* util = new SharedUtil();  
228                 util->setGroups(Sets, designMap->namesOfGroups);  
229                 delete util;
230                 
231                 int numGroups = Sets.size();
232                 for (int a=0; a<numGroups; a++) { 
233                         for (int l = 0; l < a; l++) {   
234                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
235                                 namesOfGroupCombos.push_back(groups);
236                         }
237                 }
238         
239                 
240                 //only 1 combo
241                 if (numGroups == 2) { processors = 1; }
242                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
243                 
244                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
245                         if(processors != 1){
246                                 int numPairs = namesOfGroupCombos.size();
247                                 int numPairsPerProcessor = numPairs / processors;
248                         
249                                 for (int i = 0; i < processors; i++) {
250                                         int startPos = i * numPairsPerProcessor;
251                                         if(i == processors - 1){
252                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
253                                         }
254                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
255                                 }
256                         }
257                 #endif
258                 
259                 //as long as you are not at the end of the file or done wih the lines you want
260                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
261                         
262                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear(); delete read;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
263         
264                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
265
266                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
267                                 process(lookup);
268                                 
269                                 processedLabels.insert(lookup[0]->getLabel());
270                                 userLabels.erase(lookup[0]->getLabel());
271                         }
272                         
273                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
274                                 string saveLabel = lookup[0]->getLabel();
275                         
276                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
277                                 lookup = input->getSharedRAbundVectors(lastLabel);
278                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
279                                 
280                                 process(lookup);
281                                 
282                                 processedLabels.insert(lookup[0]->getLabel());
283                                 userLabels.erase(lookup[0]->getLabel());
284                                 
285                                 //restore real lastlabel to save below
286                                 lookup[0]->setLabel(saveLabel);
287                         }
288                         
289                         lastLabel = lookup[0]->getLabel();
290                         //prevent memory leak
291                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
292                         
293                         if (m->control_pressed) {  outputTypes.clear(); globaldata->Groups.clear(); delete read;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
294
295                         //get next line to process
296                         lookup = input->getSharedRAbundVectors();                               
297                 }
298                 
299                 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); }  return 0; }
300
301                 //output error messages about any remaining user labels
302                 set<string>::iterator it;
303                 bool needToRun = false;
304                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
305                         m->mothurOut("Your file does not include the label " + *it); 
306                         if (processedLabels.count(lastLabel) != 1) {
307                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
308                                 needToRun = true;
309                         }else {
310                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
311                         }
312                 }
313         
314                 //run last label if you need to
315                 if (needToRun == true)  {
316                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
317                         lookup = input->getSharedRAbundVectors(lastLabel);
318                         
319                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
320                         
321                         process(lookup);
322                         
323                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
324                 }
325         
326                 //reset groups parameter
327                 globaldata->Groups.clear();  
328                 delete input; globaldata->ginput = NULL;
329                 delete read;
330                 delete designMap;
331                 
332                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); } return 0;}
333                 
334                 m->mothurOutEndLine();
335                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
336                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
337                 m->mothurOutEndLine();
338                 
339                 return 0;
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "MetaStatsCommand", "execute");
343                 exit(1);
344         }
345 }
346 //**********************************************************************************************************************
347
348 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
349         try {
350                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
351                 
352                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
353                                 if(processors == 1){
354                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
355                                 }else{
356                                         int process = 1;
357                                         vector<int> processIDS;
358                 
359                                         //loop through and create all the processes you want
360                                         while (process != processors) {
361                                                 int pid = fork();
362                         
363                                                 if (pid > 0) {
364                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
365                                                         process++;
366                                                 }else if (pid == 0){
367                                                         driver(lines[process].start, lines[process].num, thisLookUp);
368                                                         exit(0);
369                                                 }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
370                                         }
371                                         
372                                         //do my part
373                                         driver(lines[0].start, lines[0].num, thisLookUp);
374                 
375                                         //force parent to wait until all the processes are done
376                                         for (int i=0;i<(processors-1);i++) { 
377                                                 int temp = processIDS[i];
378                                                 wait(&temp);
379                                         }
380                                 }
381                 #else
382                                 driver(0, namesOfGroupCombos.size(), thisLookUp);
383                 #endif
384
385                 return 0;
386                 
387         }
388         catch(exception& e) {
389                 m->errorOut(e, "MetaStatsCommand", "process");
390                 exit(1);
391         }
392 }
393 //**********************************************************************************************************************
394 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
395         try {
396         
397                 //for each combo
398                 for (int c = start; c < (start+num); c++) {
399                         
400                         //get set names
401                         string setA = namesOfGroupCombos[c][0]; 
402                         string setB = namesOfGroupCombos[c][1];
403                         
404                         //get filename
405                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(globaldata->inputFileName)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
406                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
407                         int nameLength = outputFileName.length();
408                         char * output = new char[nameLength];
409                         strcpy(output, outputFileName.c_str());
410         
411                         //build matrix from shared rabunds
412                         double** data;
413                         data = new double*[thisLookUp[0]->getNumBins()];
414                         
415                         vector<SharedRAbundVector*> subset;
416                         int setACount = 0;
417                         int setBCount = 0;
418                         for (int i = 0; i < thisLookUp.size(); i++) {
419                                 string thisGroup = thisLookUp[i]->getGroup();
420                                 
421                                 //is this group for a set we want to compare??
422                                 //sorting the sets by putting setB at the back and setA in the front
423                                 if ((designMap->getGroup(thisGroup) == setB)) {  
424                                         subset.push_back(thisLookUp[i]);
425                                         setBCount++;
426                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
427                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
428                                         setACount++;
429                                 }
430                         }
431                                                 
432                         if ((setACount == 0) || (setBCount == 0))  { 
433                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
434                                 outputNames.pop_back();
435                         }else {
436                                 //fill data
437                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
438                                         data[j] = new double[subset.size()];
439                                         for (int i = 0; i < subset.size(); i++) {
440                                                 data[j][i] = (subset[i]->getAbundance(j));
441                                         }
442                                 }
443                                 
444                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
445                                 
446                                 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
447                                 
448                                 m->mothurOutEndLine(); 
449                         }
450                         
451                         //free memory
452                         delete output;
453                         for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
454                         delete[] data; 
455                 }
456                 
457                 return 0;
458
459         }
460         catch(exception& e) {
461                 m->errorOut(e, "MetaStatsCommand", "driver");
462                 exit(1);
463         }
464 }
465 //**********************************************************************************************************************
466 int MetaStatsCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
467         try {
468                 
469                 vector<SharedRAbundVector*> newLookup;
470                 for (int i = 0; i < thislookup.size(); i++) {
471                         SharedRAbundVector* temp = new SharedRAbundVector();
472                         temp->setLabel(thislookup[i]->getLabel());
473                         temp->setGroup(thislookup[i]->getGroup());
474                         newLookup.push_back(temp);
475                 }
476                 
477                 //for each bin
478                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
479                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
480                 
481                         //look at each sharedRabund and make sure they are not all zero
482                         bool allZero = true;
483                         for (int j = 0; j < thislookup.size(); j++) {
484                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
485                         }
486                         
487                         //if they are not all zero add this bin
488                         if (!allZero) {
489                                 for (int j = 0; j < thislookup.size(); j++) {
490                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
491                                 }
492                         }
493                 }
494
495                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
496
497                 thislookup = newLookup;
498                 
499                 return 0;
500  
501         }
502         catch(exception& e) {
503                 m->errorOut(e, "MetaStatsCommand", "eliminateZeroOTUS");
504                 exit(1);
505         }
506 }
507 //**********************************************************************************************************************