]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
cluster.split fix
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::getValidParameters(){  
16         try {
17                 string Array[] =  {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "MetaStatsCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 MetaStatsCommand::MetaStatsCommand(){   
28         try {
29                 abort = true;
30                 //initialize outputTypes
31                 vector<string> tempOutNames;
32                 outputTypes["metastats"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> MetaStatsCommand::getRequiredParameters(){       
41         try {
42                 string Array[] =  {"design"};
43                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "MetaStatsCommand", "getRequiredParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> MetaStatsCommand::getRequiredFiles(){    
53         try {
54                 string Array[] =  {"shared"};
55                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "MetaStatsCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64
65 MetaStatsCommand::MetaStatsCommand(string option) {
66         try {
67                 globaldata = GlobalData::getInstance();
68                 abort = false;
69                 allLines = 1;
70                 labels.clear();
71                 
72                 //allow user to run help
73                 if(option == "help") { help(); abort = true; }
74                 
75                 else {
76                         //valid paramters for this command
77                         string AlignArray[] =  {"groups","label","outputdir","iters","threshold","g","design","sets","processors","inputdir"};
78                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
79                         
80                         OptionParser parser(option);
81                         map<string,string> parameters = parser.getParameters();
82                         
83                         ValidParameters validParameter;
84                         
85                         //check to make sure all parameters are valid for command
86                         map<string,string>::iterator it;
87                         for (it = parameters.begin(); it != parameters.end(); it++) { 
88                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
89                         }
90                         
91                         //initialize outputTypes
92                         vector<string> tempOutNames;
93                         outputTypes["metastats"] = tempOutNames;
94                         
95                         //if the user changes the output directory command factory will send this info to us in the output parameter 
96                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
97                                 outputDir = ""; 
98                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
99                         }
100                         
101                         //if the user changes the input directory command factory will send this info to us in the output parameter 
102                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
103                         if (inputDir == "not found"){   inputDir = "";          }
104                         else {
105                                 string path;
106                                 it = parameters.find("design");
107                                 //user has given a template file
108                                 if(it != parameters.end()){ 
109                                         path = m->hasPath(it->second);
110                                         //if the user has not given a path then, add inputdir. else leave path alone.
111                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
112                                 }
113                         }
114                         
115                         //check for required parameters
116                         designfile = validParameter.validFile(parameters, "design", true);
117                         if (designfile == "not open") { abort = true; }
118                         else if (designfile == "not found") {  designfile = "";  m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }       
119                         
120                         //make sure the user has already run the read.otu command
121                         if ((globaldata->getSharedFile() == "")) {
122                                  m->mothurOut("You must read a list and a group, or a shared file before you can use the metastats command."); m->mothurOutEndLine(); abort = true; 
123                         }
124
125                         //check for optional parameter and set defaults
126                         // ...at some point should added some additional type checking...
127                         label = validParameter.validFile(parameters, "label", false);                   
128                         if (label == "not found") { label = ""; }
129                         else { 
130                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
131                                 else { allLines = 1;  }
132                         }
133                         
134                         //if the user has not specified any labels use the ones from read.otu
135                         if (label == "") {  
136                                 allLines = globaldata->allLines; 
137                                 labels = globaldata->labels; 
138                         }
139                         
140                         groups = validParameter.validFile(parameters, "groups", false);                 
141                         if (groups == "not found") { groups = ""; pickedGroups = false; }
142                         else { 
143                                 pickedGroups = true;
144                                 m->splitAtDash(groups, Groups);
145                                 globaldata->Groups = Groups;
146                         }
147                         
148                         sets = validParameter.validFile(parameters, "sets", false);                     
149                         if (sets == "not found") { sets = ""; }
150                         else { 
151                                 m->splitAtDash(sets, Sets);
152                         }
153
154                         
155                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
156                         convert(temp, iters); 
157                         
158                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
159                         convert(temp, threshold); 
160                         
161                         temp = validParameter.validFile(parameters, "processors", false);                       if (temp == "not found"){       temp = "1";     }
162                         convert(temp, processors); 
163                 }
164
165         }
166         catch(exception& e) {
167                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
168                 exit(1);
169         }
170 }
171
172 //**********************************************************************************************************************
173
174 void MetaStatsCommand::help(){
175         try {
176                 m->mothurOut("This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n");
177                 m->mothurOut("The metastats command can only be executed after a successful read.otu command of a list and group or shared file.\n");
178                 m->mothurOut("The metastats command outputs a .metastats file. \n");
179                 m->mothurOut("The metastats command parameters are iters, threshold, groups, label, design, sets and processors.  The design parameter is required.\n");
180                 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n");
181                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
182                 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
183                 m->mothurOut("The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n");
184                 m->mothurOut("The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n");
185                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
186                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
187                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
188                 m->mothurOut("The metastats command should be in the following format: metastats(design=yourDesignFile).\n");
189                 m->mothurOut("Example metastats(design=temp.design, groups=A-B-C).\n");
190                 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
191                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
192
193         }
194         catch(exception& e) {
195                 m->errorOut(e, "MetaStatsCommand", "help");
196                 exit(1);
197         }
198 }
199
200 //**********************************************************************************************************************
201
202 MetaStatsCommand::~MetaStatsCommand(){}
203
204 //**********************************************************************************************************************
205
206 int MetaStatsCommand::execute(){
207         try {
208         
209                 if (abort == true) { return 0; }
210                 
211                 designMap = new GroupMap(designfile);
212                 designMap->readDesignMap();
213                 
214                 read = new ReadOTUFile(globaldata->inputFileName);      
215                 read->read(&*globaldata); 
216                 input = globaldata->ginput;
217                 lookup = input->getSharedRAbundVectors();
218                 string lastLabel = lookup[0]->getLabel();
219                 
220                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
221                 set<string> processedLabels;
222                 set<string> userLabels = labels;
223                 
224                 //setup the pairwise comparions of sets for metastats
225                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
226                 //make sure sets are all in designMap
227                 SharedUtil* util = new SharedUtil();  
228                 util->setGroups(Sets, designMap->namesOfGroups);  
229                 delete util;
230                 
231                 int numGroups = Sets.size();
232                 for (int a=0; a<numGroups; a++) { 
233                         for (int l = 0; l < a; l++) {   
234                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
235                                 namesOfGroupCombos.push_back(groups);
236                         }
237                 }
238         
239                 
240                 //only 1 combo
241                 if (numGroups == 2) { processors = 1; }
242                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
243                 
244                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
245                         if(processors != 1){
246                                 int numPairs = namesOfGroupCombos.size();
247                                 int numPairsPerProcessor = numPairs / processors;
248                         
249                                 for (int i = 0; i < processors; i++) {
250                                         int startPos = i * numPairsPerProcessor;
251                                         if(i == processors - 1){
252                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
253                                         }
254                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
255                                 }
256                         }
257                 #endif
258                 
259                 //as long as you are not at the end of the file or done wih the lines you want
260                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
261                         
262                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear(); delete read;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
263         
264                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
265
266                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
267                                 process(lookup);
268                                 
269                                 processedLabels.insert(lookup[0]->getLabel());
270                                 userLabels.erase(lookup[0]->getLabel());
271                         }
272                         
273                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
274                                 string saveLabel = lookup[0]->getLabel();
275                         
276                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
277                                 lookup = input->getSharedRAbundVectors(lastLabel);
278                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
279                                 
280                                 process(lookup);
281                                 
282                                 processedLabels.insert(lookup[0]->getLabel());
283                                 userLabels.erase(lookup[0]->getLabel());
284                                 
285                                 //restore real lastlabel to save below
286                                 lookup[0]->setLabel(saveLabel);
287                         }
288                         
289                         lastLabel = lookup[0]->getLabel();
290                         //prevent memory leak
291                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
292                         
293                         if (m->control_pressed) {  outputTypes.clear(); globaldata->Groups.clear(); delete read;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
294
295                         //get next line to process
296                         lookup = input->getSharedRAbundVectors();                               
297                 }
298                 
299                 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); }  return 0; }
300
301                 //output error messages about any remaining user labels
302                 set<string>::iterator it;
303                 bool needToRun = false;
304                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
305                         m->mothurOut("Your file does not include the label " + *it); 
306                         if (processedLabels.count(lastLabel) != 1) {
307                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
308                                 needToRun = true;
309                         }else {
310                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
311                         }
312                 }
313         
314                 //run last label if you need to
315                 if (needToRun == true)  {
316                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
317                         lookup = input->getSharedRAbundVectors(lastLabel);
318                         
319                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
320                         
321                         process(lookup);
322                         
323                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
324                 }
325         
326                 //reset groups parameter
327                 globaldata->Groups.clear();  
328                 delete input; globaldata->ginput = NULL;
329                 delete read;
330                 delete designMap;
331                 
332                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); } return 0;}
333                 
334                 m->mothurOutEndLine();
335                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
336                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
337                 m->mothurOutEndLine();
338                 
339                 return 0;
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "MetaStatsCommand", "execute");
343                 exit(1);
344         }
345 }
346 //**********************************************************************************************************************
347
348 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
349         try {
350                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
351                 
352                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
353                                 if(processors == 1){
354                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
355                                 }else{
356                                         int process = 1;
357                                         vector<int> processIDS;
358                 
359                                         //loop through and create all the processes you want
360                                         while (process != processors) {
361                                                 int pid = fork();
362                         
363                                                 if (pid > 0) {
364                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
365                                                         process++;
366                                                 }else if (pid == 0){
367                                                         driver(lines[process].start, lines[process].num, thisLookUp);
368                                                         exit(0);
369                                                 }else { 
370                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
371                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
372                                                         exit(0);
373                                                 }
374                                         }
375                                         
376                                         //do my part
377                                         driver(lines[0].start, lines[0].num, thisLookUp);
378                 
379                                         //force parent to wait until all the processes are done
380                                         for (int i=0;i<(processors-1);i++) { 
381                                                 int temp = processIDS[i];
382                                                 wait(&temp);
383                                         }
384                                 }
385                 #else
386                                 driver(0, namesOfGroupCombos.size(), thisLookUp);
387                 #endif
388
389                 return 0;
390                 
391         }
392         catch(exception& e) {
393                 m->errorOut(e, "MetaStatsCommand", "process");
394                 exit(1);
395         }
396 }
397 //**********************************************************************************************************************
398 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
399         try {
400         
401                 //for each combo
402                 for (int c = start; c < (start+num); c++) {
403                         
404                         //get set names
405                         string setA = namesOfGroupCombos[c][0]; 
406                         string setB = namesOfGroupCombos[c][1];
407                         
408                         //get filename
409                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(globaldata->inputFileName)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
410                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
411                         int nameLength = outputFileName.length();
412                         char * output = new char[nameLength];
413                         strcpy(output, outputFileName.c_str());
414         
415                         //build matrix from shared rabunds
416                         double** data;
417                         data = new double*[thisLookUp[0]->getNumBins()];
418                         
419                         vector<SharedRAbundVector*> subset;
420                         int setACount = 0;
421                         int setBCount = 0;
422                         for (int i = 0; i < thisLookUp.size(); i++) {
423                                 string thisGroup = thisLookUp[i]->getGroup();
424                                 
425                                 //is this group for a set we want to compare??
426                                 //sorting the sets by putting setB at the back and setA in the front
427                                 if ((designMap->getGroup(thisGroup) == setB)) {  
428                                         subset.push_back(thisLookUp[i]);
429                                         setBCount++;
430                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
431                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
432                                         setACount++;
433                                 }
434                         }
435                                                 
436                         if ((setACount == 0) || (setBCount == 0))  { 
437                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
438                                 outputNames.pop_back();
439                         }else {
440                                 //fill data
441                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
442                                         data[j] = new double[subset.size()];
443                                         for (int i = 0; i < subset.size(); i++) {
444                                                 data[j][i] = (subset[i]->getAbundance(j));
445                                         }
446                                 }
447                                 
448                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
449                                 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
450                                 
451                                 m->mothurOutEndLine(); 
452                         }
453                         
454                         //free memory
455                         delete output;
456                         for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
457                         delete[] data; 
458                 }
459                 
460                 return 0;
461
462         }
463         catch(exception& e) {
464                 m->errorOut(e, "MetaStatsCommand", "driver");
465                 exit(1);
466         }
467 }
468 //**********************************************************************************************************************
469 int MetaStatsCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
470         try {
471                 
472                 vector<SharedRAbundVector*> newLookup;
473                 for (int i = 0; i < thislookup.size(); i++) {
474                         SharedRAbundVector* temp = new SharedRAbundVector();
475                         temp->setLabel(thislookup[i]->getLabel());
476                         temp->setGroup(thislookup[i]->getGroup());
477                         newLookup.push_back(temp);
478                 }
479                 
480                 //for each bin
481                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
482                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
483                 
484                         //look at each sharedRabund and make sure they are not all zero
485                         bool allZero = true;
486                         for (int j = 0; j < thislookup.size(); j++) {
487                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
488                         }
489                         
490                         //if they are not all zero add this bin
491                         if (!allZero) {
492                                 for (int j = 0; j < thislookup.size(); j++) {
493                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
494                                 }
495                         }
496                 }
497
498                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
499
500                 thislookup = newLookup;
501                 
502                 return 0;
503  
504         }
505         catch(exception& e) {
506                 m->errorOut(e, "MetaStatsCommand", "eliminateZeroOTUS");
507                 exit(1);
508         }
509 }
510 //**********************************************************************************************************************