]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
testing 1.14.0
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::getValidParameters(){  
16         try {
17                 string Array[] =  {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "MetaStatsCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 MetaStatsCommand::MetaStatsCommand(){   
28         try {
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["metastats"] = tempOutNames;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> MetaStatsCommand::getRequiredParameters(){       
40         try {
41                 string Array[] =  {"design"};
42                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "MetaStatsCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> MetaStatsCommand::getRequiredFiles(){    
52         try {
53                 string Array[] =  {"shared"};
54                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "MetaStatsCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63
64 MetaStatsCommand::MetaStatsCommand(string option) {
65         try {
66                 globaldata = GlobalData::getInstance();
67                 abort = false;
68                 allLines = 1;
69                 labels.clear();
70                 
71                 //allow user to run help
72                 if(option == "help") { help(); abort = true; }
73                 
74                 else {
75                         //valid paramters for this command
76                         string AlignArray[] =  {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
77                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
78                         
79                         OptionParser parser(option);
80                         map<string,string> parameters = parser.getParameters();
81                         
82                         ValidParameters validParameter;
83                         
84                         //check to make sure all parameters are valid for command
85                         map<string,string>::iterator it;
86                         for (it = parameters.begin(); it != parameters.end(); it++) { 
87                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
88                         }
89                         
90                         //initialize outputTypes
91                         vector<string> tempOutNames;
92                         outputTypes["metastats"] = tempOutNames;
93                         
94                         //if the user changes the output directory command factory will send this info to us in the output parameter 
95                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
96                                 outputDir = ""; 
97                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
98                         }
99                         
100                         //if the user changes the input directory command factory will send this info to us in the output parameter 
101                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
102                         if (inputDir == "not found"){   inputDir = "";          }
103                         else {
104                                 string path;
105                                 it = parameters.find("design");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
111                                 }
112                         }
113                         
114                         //check for required parameters
115                         designfile = validParameter.validFile(parameters, "design", true);
116                         if (designfile == "not open") { abort = true; }
117                         else if (designfile == "not found") {  designfile = "";  m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }       
118                         
119                         //make sure the user has already run the read.otu command
120                         if ((globaldata->getSharedFile() == "")) {
121                                  m->mothurOut("You must read a list and a group, or a shared file before you can use the metastats command."); m->mothurOutEndLine(); abort = true; 
122                         }
123
124                         //check for optional parameter and set defaults
125                         // ...at some point should added some additional type checking...
126                         label = validParameter.validFile(parameters, "label", false);                   
127                         if (label == "not found") { label = ""; }
128                         else { 
129                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
130                                 else { allLines = 1;  }
131                         }
132                         
133                         //if the user has not specified any labels use the ones from read.otu
134                         if (label == "") {  
135                                 allLines = globaldata->allLines; 
136                                 labels = globaldata->labels; 
137                         }
138                         
139                         groups = validParameter.validFile(parameters, "groups", false);                 
140                         if (groups == "not found") { groups = ""; pickedGroups = false; }
141                         else { 
142                                 pickedGroups = true;
143                                 m->splitAtDash(groups, Groups);
144                                 globaldata->Groups = Groups;
145                         }
146                         
147                         sets = validParameter.validFile(parameters, "sets", false);                     
148                         if (sets == "not found") { sets = ""; }
149                         else { 
150                                 m->splitAtDash(sets, Sets);
151                         }
152
153                         
154                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
155                         convert(temp, iters); 
156                         
157                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
158                         convert(temp, threshold); 
159                         
160                         temp = validParameter.validFile(parameters, "processors", false);                       if (temp == "not found"){       temp = "1";     }
161                         convert(temp, processors); 
162                 }
163
164         }
165         catch(exception& e) {
166                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
167                 exit(1);
168         }
169 }
170
171 //**********************************************************************************************************************
172
173 void MetaStatsCommand::help(){
174         try {
175                 m->mothurOut("This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n");
176                 m->mothurOut("The metastats command can only be executed after a successful read.otu command of a list and group or shared file.\n");
177                 m->mothurOut("The metastats command outputs a .metastats file. \n");
178                 m->mothurOut("The metastats command parameters are iters, threshold, groups, label, design, sets and processors.  The design parameter is required.\n");
179                 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n");
180                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
181                 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
182                 m->mothurOut("The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n");
183                 m->mothurOut("The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n");
184                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
185                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
186                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
187                 m->mothurOut("The metastats command should be in the following format: metastats(design=yourDesignFile).\n");
188                 m->mothurOut("Example metastats(design=temp.design, groups=A-B-C).\n");
189                 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
190                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
191
192         }
193         catch(exception& e) {
194                 m->errorOut(e, "MetaStatsCommand", "help");
195                 exit(1);
196         }
197 }
198
199 //**********************************************************************************************************************
200
201 MetaStatsCommand::~MetaStatsCommand(){}
202
203 //**********************************************************************************************************************
204
205 int MetaStatsCommand::execute(){
206         try {
207         
208                 if (abort == true) { return 0; }
209                 
210                 designMap = new GroupMap(designfile);
211                 designMap->readDesignMap();
212                 
213                 read = new ReadOTUFile(globaldata->inputFileName);      
214                 read->read(&*globaldata); 
215                 input = globaldata->ginput;
216                 lookup = input->getSharedRAbundVectors();
217                 string lastLabel = lookup[0]->getLabel();
218                 
219                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
220                 set<string> processedLabels;
221                 set<string> userLabels = labels;
222                 
223                 //setup the pairwise comparions of sets for metastats
224                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
225                 //make sure sets are all in designMap
226                 SharedUtil* util = new SharedUtil();  
227                 util->setGroups(Sets, designMap->namesOfGroups);  
228                 delete util;
229                 
230                 int numGroups = Sets.size();
231                 for (int a=0; a<numGroups; a++) { 
232                         for (int l = 0; l < a; l++) {   
233                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
234                                 namesOfGroupCombos.push_back(groups);
235                         }
236                 }
237         
238                 
239                 //only 1 combo
240                 if (numGroups == 2) { processors = 1; }
241                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
242                 
243                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
244                         if(processors != 1){
245                                 int numPairs = namesOfGroupCombos.size();
246                                 int numPairsPerProcessor = numPairs / processors;
247                         
248                                 for (int i = 0; i < processors; i++) {
249                                         int startPos = i * numPairsPerProcessor;
250                                         if(i == processors - 1){
251                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
252                                         }
253                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
254                                 }
255                         }
256                 #endif
257                 
258                 //as long as you are not at the end of the file or done wih the lines you want
259                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
260                         
261                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear(); delete read;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
262         
263                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
264
265                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
266                                 process(lookup);
267                                 
268                                 processedLabels.insert(lookup[0]->getLabel());
269                                 userLabels.erase(lookup[0]->getLabel());
270                         }
271                         
272                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
273                                 string saveLabel = lookup[0]->getLabel();
274                         
275                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
276                                 lookup = input->getSharedRAbundVectors(lastLabel);
277                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
278                                 
279                                 process(lookup);
280                                 
281                                 processedLabels.insert(lookup[0]->getLabel());
282                                 userLabels.erase(lookup[0]->getLabel());
283                                 
284                                 //restore real lastlabel to save below
285                                 lookup[0]->setLabel(saveLabel);
286                         }
287                         
288                         lastLabel = lookup[0]->getLabel();
289                         //prevent memory leak
290                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
291                         
292                         if (m->control_pressed) {  outputTypes.clear(); globaldata->Groups.clear(); delete read;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
293
294                         //get next line to process
295                         lookup = input->getSharedRAbundVectors();                               
296                 }
297                 
298                 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); }  return 0; }
299
300                 //output error messages about any remaining user labels
301                 set<string>::iterator it;
302                 bool needToRun = false;
303                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
304                         m->mothurOut("Your file does not include the label " + *it); 
305                         if (processedLabels.count(lastLabel) != 1) {
306                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
307                                 needToRun = true;
308                         }else {
309                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
310                         }
311                 }
312         
313                 //run last label if you need to
314                 if (needToRun == true)  {
315                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
316                         lookup = input->getSharedRAbundVectors(lastLabel);
317                         
318                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
319                         
320                         process(lookup);
321                         
322                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
323                 }
324         
325                 //reset groups parameter
326                 globaldata->Groups.clear();  
327                 delete input; globaldata->ginput = NULL;
328                 delete read;
329                 delete designMap;
330                 
331                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); } return 0;}
332                 
333                 m->mothurOutEndLine();
334                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
335                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
336                 m->mothurOutEndLine();
337                 
338                 return 0;
339         }
340         catch(exception& e) {
341                 m->errorOut(e, "MetaStatsCommand", "execute");
342                 exit(1);
343         }
344 }
345 //**********************************************************************************************************************
346
347 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
348         try {
349                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
350                 
351                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
352                                 if(processors == 1){
353                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
354                                 }else{
355                                         int process = 1;
356                                         vector<int> processIDS;
357                 
358                                         //loop through and create all the processes you want
359                                         while (process != processors) {
360                                                 int pid = fork();
361                         
362                                                 if (pid > 0) {
363                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
364                                                         process++;
365                                                 }else if (pid == 0){
366                                                         driver(lines[process].start, lines[process].num, thisLookUp);
367                                                         exit(0);
368                                                 }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
369                                         }
370                                         
371                                         //do my part
372                                         driver(lines[0].start, lines[0].num, thisLookUp);
373                 
374                                         //force parent to wait until all the processes are done
375                                         for (int i=0;i<(processors-1);i++) { 
376                                                 int temp = processIDS[i];
377                                                 wait(&temp);
378                                         }
379                                 }
380                 #else
381                                 driver(0, namesOfGroupCombos.size(), thisLookUp);
382                 #endif
383
384                 return 0;
385                 
386         }
387         catch(exception& e) {
388                 m->errorOut(e, "MetaStatsCommand", "process");
389                 exit(1);
390         }
391 }
392 //**********************************************************************************************************************
393 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
394         try {
395         
396                 //for each combo
397                 for (int c = start; c < (start+num); c++) {
398                         
399                         //get set names
400                         string setA = namesOfGroupCombos[c][0]; 
401                         string setB = namesOfGroupCombos[c][1];
402                         
403                         //get filename
404                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(globaldata->inputFileName)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
405                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
406                         int nameLength = outputFileName.length();
407                         char * output = new char[nameLength];
408                         strcpy(output, outputFileName.c_str());
409         
410                         //build matrix from shared rabunds
411                         double** data;
412                         data = new double*[thisLookUp[0]->getNumBins()];
413                         
414                         vector<SharedRAbundVector*> subset;
415                         int setACount = 0;
416                         int setBCount = 0;
417                         for (int i = 0; i < thisLookUp.size(); i++) {
418                                 string thisGroup = thisLookUp[i]->getGroup();
419                                 
420                                 //is this group for a set we want to compare??
421                                 //sorting the sets by putting setB at the back and setA in the front
422                                 if ((designMap->getGroup(thisGroup) == setB)) {  
423                                         subset.push_back(thisLookUp[i]);
424                                         setBCount++;
425                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
426                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
427                                         setACount++;
428                                 }
429                         }
430                                                 
431                         if ((setACount == 0) || (setBCount == 0))  { 
432                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
433                                 outputNames.pop_back();
434                         }else {
435                                 //fill data
436                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
437                                         data[j] = new double[subset.size()];
438                                         for (int i = 0; i < subset.size(); i++) {
439                                                 data[j][i] = (subset[i]->getAbundance(j));
440                                         }
441                                 }
442                                 
443                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
444                                 
445                                 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
446                                 
447                                 m->mothurOutEndLine(); 
448                         }
449                         
450                         //free memory
451                         delete output;
452                         for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
453                         delete[] data; 
454                 }
455                 
456                 return 0;
457
458         }
459         catch(exception& e) {
460                 m->errorOut(e, "MetaStatsCommand", "driver");
461                 exit(1);
462         }
463 }
464 //**********************************************************************************************************************
465 int MetaStatsCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
466         try {
467                 
468                 vector<SharedRAbundVector*> newLookup;
469                 for (int i = 0; i < thislookup.size(); i++) {
470                         SharedRAbundVector* temp = new SharedRAbundVector();
471                         temp->setLabel(thislookup[i]->getLabel());
472                         temp->setGroup(thislookup[i]->getGroup());
473                         newLookup.push_back(temp);
474                 }
475                 
476                 //for each bin
477                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
478                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
479                 
480                         //look at each sharedRabund and make sure they are not all zero
481                         bool allZero = true;
482                         for (int j = 0; j < thislookup.size(); j++) {
483                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
484                         }
485                         
486                         //if they are not all zero add this bin
487                         if (!allZero) {
488                                 for (int j = 0; j < thislookup.size(); j++) {
489                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
490                                 }
491                         }
492                 }
493
494                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
495
496                 thislookup = newLookup;
497                 
498                 return 0;
499  
500         }
501         catch(exception& e) {
502                 m->errorOut(e, "MetaStatsCommand", "eliminateZeroOTUS");
503                 exit(1);
504         }
505 }
506 //**********************************************************************************************************************