]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
added [ERROR] flag if command aborts
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::getValidParameters(){  
16         try {
17                 string Array[] =  {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "MetaStatsCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 MetaStatsCommand::MetaStatsCommand(){   
28         try {
29                 abort = true; calledHelp = true; 
30                 vector<string> tempOutNames;
31                 outputTypes["metastats"] = tempOutNames;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> MetaStatsCommand::getRequiredParameters(){       
40         try {
41                 string Array[] =  {"design"};
42                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "MetaStatsCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> MetaStatsCommand::getRequiredFiles(){    
52         try {
53                 string Array[] =  {"shared"};
54                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "MetaStatsCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63
64 MetaStatsCommand::MetaStatsCommand(string option) {
65         try {
66                 globaldata = GlobalData::getInstance();
67                 abort = false; calledHelp = false;   
68                 allLines = 1;
69                 labels.clear();
70                 
71                 //allow user to run help
72                 if(option == "help") { help(); abort = true; calledHelp = true; }
73                 
74                 else {
75                         //valid paramters for this command
76                         string AlignArray[] =  {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
77                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
78                         
79                         OptionParser parser(option);
80                         map<string,string> parameters = parser.getParameters();
81                         
82                         ValidParameters validParameter;
83                         
84                         //check to make sure all parameters are valid for command
85                         map<string,string>::iterator it;
86                         for (it = parameters.begin(); it != parameters.end(); it++) { 
87                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
88                         }
89                         
90                         //initialize outputTypes
91                         vector<string> tempOutNames;
92                         outputTypes["metastats"] = tempOutNames;
93                         
94                         //if the user changes the output directory command factory will send this info to us in the output parameter 
95                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
96                                 outputDir = ""; 
97                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
98                         }
99                         
100                         //if the user changes the input directory command factory will send this info to us in the output parameter 
101                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
102                         if (inputDir == "not found"){   inputDir = "";          }
103                         else {
104                                 string path;
105                                 it = parameters.find("design");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
111                                 }
112                         }
113                         
114                         //check for required parameters
115                         designfile = validParameter.validFile(parameters, "design", true);
116                         if (designfile == "not open") { abort = true; }
117                         else if (designfile == "not found") {  designfile = "";  m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }       
118                         
119                         //make sure the user has already run the read.otu command
120                         if ((globaldata->getSharedFile() == "")) {
121                                  m->mothurOut("You must read a list and a group, or a shared file before you can use the metastats command."); m->mothurOutEndLine(); abort = true; 
122                         }
123
124                         //check for optional parameter and set defaults
125                         // ...at some point should added some additional type checking...
126                         label = validParameter.validFile(parameters, "label", false);                   
127                         if (label == "not found") { label = ""; }
128                         else { 
129                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
130                                 else { allLines = 1;  }
131                         }
132                         
133                         //if the user has not specified any labels use the ones from read.otu
134                         if (label == "") {  
135                                 allLines = globaldata->allLines; 
136                                 labels = globaldata->labels; 
137                         }
138                         
139                         groups = validParameter.validFile(parameters, "groups", false);                 
140                         if (groups == "not found") { groups = ""; pickedGroups = false; }
141                         else { 
142                                 pickedGroups = true;
143                                 m->splitAtDash(groups, Groups);
144                                 globaldata->Groups = Groups;
145                         }
146                         
147                         sets = validParameter.validFile(parameters, "sets", false);                     
148                         if (sets == "not found") { sets = ""; }
149                         else { 
150                                 m->splitAtDash(sets, Sets);
151                         }
152
153                         
154                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
155                         convert(temp, iters); 
156                         
157                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
158                         convert(temp, threshold); 
159                         
160                         temp = validParameter.validFile(parameters, "processors", false);                       if (temp == "not found"){       temp = "1";     }
161                         convert(temp, processors); 
162                 }
163
164         }
165         catch(exception& e) {
166                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
167                 exit(1);
168         }
169 }
170
171 //**********************************************************************************************************************
172
173 void MetaStatsCommand::help(){
174         try {
175                 m->mothurOut("This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n");
176                 m->mothurOut("The metastats command can only be executed after a successful read.otu command of a list and group or shared file.\n");
177                 m->mothurOut("The metastats command outputs a .metastats file. \n");
178                 m->mothurOut("The metastats command parameters are iters, threshold, groups, label, design, sets and processors.  The design parameter is required.\n");
179                 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n");
180                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
181                 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
182                 m->mothurOut("The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n");
183                 m->mothurOut("The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n");
184                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
185                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
186                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
187                 m->mothurOut("The metastats command should be in the following format: metastats(design=yourDesignFile).\n");
188                 m->mothurOut("Example metastats(design=temp.design, groups=A-B-C).\n");
189                 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
190                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
191
192         }
193         catch(exception& e) {
194                 m->errorOut(e, "MetaStatsCommand", "help");
195                 exit(1);
196         }
197 }
198
199 //**********************************************************************************************************************
200
201 MetaStatsCommand::~MetaStatsCommand(){}
202
203 //**********************************************************************************************************************
204
205 int MetaStatsCommand::execute(){
206         try {
207         
208                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
209                 
210                 designMap = new GroupMap(designfile);
211                 designMap->readDesignMap();
212                 
213                 read = new ReadOTUFile(globaldata->inputFileName);      
214                 read->read(&*globaldata); 
215                 input = globaldata->ginput;
216                 lookup = input->getSharedRAbundVectors();
217                 string lastLabel = lookup[0]->getLabel();
218                 
219                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
220                 set<string> processedLabels;
221                 set<string> userLabels = labels;
222                 
223                 //setup the pairwise comparions of sets for metastats
224                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
225                 //make sure sets are all in designMap
226                 SharedUtil* util = new SharedUtil();  
227                 util->setGroups(Sets, designMap->namesOfGroups);  
228                 delete util;
229                 
230                 int numGroups = Sets.size();
231                 for (int a=0; a<numGroups; a++) { 
232                         for (int l = 0; l < a; l++) {   
233                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
234                                 namesOfGroupCombos.push_back(groups);
235                         }
236                 }
237         
238                 
239                 //only 1 combo
240                 if (numGroups == 2) { processors = 1; }
241                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
242                 
243                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
244                         if(processors != 1){
245                                 int numPairs = namesOfGroupCombos.size();
246                                 int numPairsPerProcessor = numPairs / processors;
247                         
248                                 for (int i = 0; i < processors; i++) {
249                                         int startPos = i * numPairsPerProcessor;
250                                         if(i == processors - 1){
251                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
252                                         }
253                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
254                                 }
255                         }
256                 #endif
257                 
258                 //as long as you are not at the end of the file or done wih the lines you want
259                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
260                         
261                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear(); delete read;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
262         
263                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
264
265                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
266                                 process(lookup);
267                                 
268                                 processedLabels.insert(lookup[0]->getLabel());
269                                 userLabels.erase(lookup[0]->getLabel());
270                         }
271                         
272                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
273                                 string saveLabel = lookup[0]->getLabel();
274                         
275                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
276                                 lookup = input->getSharedRAbundVectors(lastLabel);
277                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
278                                 
279                                 process(lookup);
280                                 
281                                 processedLabels.insert(lookup[0]->getLabel());
282                                 userLabels.erase(lookup[0]->getLabel());
283                                 
284                                 //restore real lastlabel to save below
285                                 lookup[0]->setLabel(saveLabel);
286                         }
287                         
288                         lastLabel = lookup[0]->getLabel();
289                         //prevent memory leak
290                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
291                         
292                         if (m->control_pressed) {  outputTypes.clear(); globaldata->Groups.clear(); delete read;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
293
294                         //get next line to process
295                         lookup = input->getSharedRAbundVectors();                               
296                 }
297                 
298                 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); }  return 0; }
299
300                 //output error messages about any remaining user labels
301                 set<string>::iterator it;
302                 bool needToRun = false;
303                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
304                         m->mothurOut("Your file does not include the label " + *it); 
305                         if (processedLabels.count(lastLabel) != 1) {
306                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
307                                 needToRun = true;
308                         }else {
309                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
310                         }
311                 }
312         
313                 //run last label if you need to
314                 if (needToRun == true)  {
315                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
316                         lookup = input->getSharedRAbundVectors(lastLabel);
317                         
318                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
319                         
320                         process(lookup);
321                         
322                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
323                 }
324         
325                 //reset groups parameter
326                 globaldata->Groups.clear();  
327                 delete input; globaldata->ginput = NULL;
328                 delete read;
329                 delete designMap;
330                 
331                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); } return 0;}
332                 
333                 m->mothurOutEndLine();
334                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
335                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
336                 m->mothurOutEndLine();
337                 
338                 return 0;
339         }
340         catch(exception& e) {
341                 m->errorOut(e, "MetaStatsCommand", "execute");
342                 exit(1);
343         }
344 }
345 //**********************************************************************************************************************
346
347 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
348         try {
349                 
350                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
351                                 if(processors == 1){
352                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
353                                 }else{
354                                         int process = 1;
355                                         vector<int> processIDS;
356                 
357                                         //loop through and create all the processes you want
358                                         while (process != processors) {
359                                                 int pid = fork();
360                         
361                                                 if (pid > 0) {
362                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
363                                                         process++;
364                                                 }else if (pid == 0){
365                                                         driver(lines[process].start, lines[process].num, thisLookUp);
366                                                         exit(0);
367                                                 }else { 
368                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
369                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
370                                                         exit(0);
371                                                 }
372                                         }
373                                         
374                                         //do my part
375                                         driver(lines[0].start, lines[0].num, thisLookUp);
376                 
377                                         //force parent to wait until all the processes are done
378                                         for (int i=0;i<(processors-1);i++) { 
379                                                 int temp = processIDS[i];
380                                                 wait(&temp);
381                                         }
382                                 }
383                 #else
384                                 driver(0, namesOfGroupCombos.size(), thisLookUp);
385                 #endif
386
387                 return 0;
388                 
389         }
390         catch(exception& e) {
391                 m->errorOut(e, "MetaStatsCommand", "process");
392                 exit(1);
393         }
394 }
395 //**********************************************************************************************************************
396 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
397         try {
398         
399                 //for each combo
400                 for (int c = start; c < (start+num); c++) {
401                         
402                         //get set names
403                         string setA = namesOfGroupCombos[c][0]; 
404                         string setB = namesOfGroupCombos[c][1];
405                         
406                         //get filename
407                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(globaldata->inputFileName)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
408                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
409                         int nameLength = outputFileName.length();
410                         char * output = new char[nameLength];
411                         strcpy(output, outputFileName.c_str());
412         
413                         //build matrix from shared rabunds
414                         double** data;
415                         data = new double*[thisLookUp[0]->getNumBins()];
416                         
417                         vector<SharedRAbundVector*> subset;
418                         int setACount = 0;
419                         int setBCount = 0;
420                         for (int i = 0; i < thisLookUp.size(); i++) {
421                                 string thisGroup = thisLookUp[i]->getGroup();
422                                 
423                                 //is this group for a set we want to compare??
424                                 //sorting the sets by putting setB at the back and setA in the front
425                                 if ((designMap->getGroup(thisGroup) == setB)) {  
426                                         subset.push_back(thisLookUp[i]);
427                                         setBCount++;
428                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
429                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
430                                         setACount++;
431                                 }
432                         }
433                                                 
434                         if ((setACount == 0) || (setBCount == 0))  { 
435                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
436                                 outputNames.pop_back();
437                         }else {
438                                 //fill data
439                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
440                                         data[j] = new double[subset.size()];
441                                         for (int i = 0; i < subset.size(); i++) {
442                                                 data[j][i] = (subset[i]->getAbundance(j));
443                                         }
444                                 }
445                                 
446                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
447                                 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
448                                 
449                                 m->mothurOutEndLine(); 
450                         }
451                         
452                         //free memory
453                         delete output;
454                         for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
455                         delete[] data; 
456                 }
457                 
458                 return 0;
459
460         }
461         catch(exception& e) {
462                 m->errorOut(e, "MetaStatsCommand", "driver");
463                 exit(1);
464         }
465 }
466 //**********************************************************************************************************************