]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13 #include "mothurmetastats.h"
14
15 //**********************************************************************************************************************
16 vector<string> MetaStatsCommand::setParameters(){       
17         try {
18                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22                 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
23                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
24                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
25                 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MetaStatsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MetaStatsCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
43                 helpString += "The metastats command outputs a .metastats file. \n";
44                 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors.  The shared and design parameters are required, unless you have valid current files.\n";
45                 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
46                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
47                 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48                 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n";
49                 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
50                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
52                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
53                 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
54                 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
55                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MetaStatsCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 MetaStatsCommand::MetaStatsCommand(){   
66         try {
67                 abort = true; calledHelp = true;
68                 setParameters();
69                 vector<string> tempOutNames;
70                 outputTypes["metastats"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78
79 MetaStatsCommand::MetaStatsCommand(string option) {
80         try {
81                 abort = false; calledHelp = false;   
82                 allLines = 1;
83                 
84                 
85                 //allow user to run help
86                 if(option == "help") { help(); abort = true; calledHelp = true; }
87                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
88                 
89                 else {
90                         vector<string> myArray = setParameters();
91                         
92                         OptionParser parser(option);
93                         map<string,string> parameters = parser.getParameters();
94                         
95                         ValidParameters validParameter;
96                         
97                         //check to make sure all parameters are valid for command
98                         map<string,string>::iterator it;
99                         for (it = parameters.begin(); it != parameters.end(); it++) { 
100                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
101                         }
102                         
103                         //initialize outputTypes
104                         vector<string> tempOutNames;
105                         outputTypes["metastats"] = tempOutNames;
106                         
107                         
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("design");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
119                                 }
120                                 
121                                 it = parameters.find("shared");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
127                                 }
128                                 
129                         }
130                         
131                         //check for required parameters
132                         sharedfile = validParameter.validFile(parameters, "shared", true);
133                         if (sharedfile == "not open") { abort = true; }
134                         else if (sharedfile == "not found") {                           //if there is a current shared file, use it
135                                 sharedfile = m->getSharedFile(); 
136                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
137                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
138                         }else { m->setSharedFile(sharedfile); }
139                         
140                         //check for required parameters
141                         designfile = validParameter.validFile(parameters, "design", true);
142                         if (designfile == "not open") { abort = true; }
143                         else if (designfile == "not found") {                           
144                                 //if there is a current design file, use it
145                                 designfile = m->getDesignFile(); 
146                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setDesignFile(designfile); }
149                         
150                         //if the user changes the output directory command factory will send this info to us in the output parameter 
151                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
152                                 outputDir = ""; 
153                                 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it      
154                         }
155
156                         //check for optional parameter and set defaults
157                         // ...at some point should added some additional type checking...
158                         label = validParameter.validFile(parameters, "label", false);                   
159                         if (label == "not found") { label = ""; }
160                         else { 
161                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
162                                 else { allLines = 1;  }
163                         }
164                         
165                         groups = validParameter.validFile(parameters, "groups", false);                 
166                         if (groups == "not found") { groups = ""; pickedGroups = false; }
167                         else { 
168                                 pickedGroups = true;
169                                 m->splitAtDash(groups, Groups);
170                                 m->Groups = Groups;
171                         }
172                         
173                         sets = validParameter.validFile(parameters, "sets", false);                     
174                         if (sets == "not found") { sets = ""; }
175                         else { 
176                                 m->splitAtDash(sets, Sets);
177                         }
178
179                         
180                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
181                         convert(temp, iters); 
182                         
183                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
184                         convert(temp, threshold); 
185                         
186                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
187                         m->setProcessors(temp);
188                         convert(temp, processors);
189                 }
190
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
194                 exit(1);
195         }
196 }
197 //**********************************************************************************************************************
198
199 int MetaStatsCommand::execute(){
200         try {
201         
202                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
203                 
204                 designMap = new GroupMap(designfile);
205                 designMap->readDesignMap();
206         
207                 input = new InputData(sharedfile, "sharedfile");
208                 lookup = input->getSharedRAbundVectors();
209                 string lastLabel = lookup[0]->getLabel();
210                 
211                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
212                 set<string> processedLabels;
213                 set<string> userLabels = labels;
214                 
215                 //setup the pairwise comparions of sets for metastats
216                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
217                 //make sure sets are all in designMap
218                 SharedUtil* util = new SharedUtil();  
219                 util->setGroups(Sets, designMap->namesOfGroups);  
220                 delete util;
221                 
222                 int numGroups = Sets.size();
223                 for (int a=0; a<numGroups; a++) { 
224                         for (int l = 0; l < a; l++) {   
225                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
226                                 namesOfGroupCombos.push_back(groups);
227                         }
228                 }
229         
230                 
231                 //only 1 combo
232                 if (numGroups == 2) { processors = 1; }
233                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
234                 
235                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
236                         if(processors != 1){
237                                 int numPairs = namesOfGroupCombos.size();
238                                 int numPairsPerProcessor = numPairs / processors;
239                         
240                                 for (int i = 0; i < processors; i++) {
241                                         int startPos = i * numPairsPerProcessor;
242                                         if(i == processors - 1){
243                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
244                                         }
245                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
246                                 }
247                         }
248                 #endif
249                 
250                 //as long as you are not at the end of the file or done wih the lines you want
251                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
252                         
253                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); } return 0; }
254         
255                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
256
257                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
258                                 process(lookup);
259                                 
260                                 processedLabels.insert(lookup[0]->getLabel());
261                                 userLabels.erase(lookup[0]->getLabel());
262                         }
263                         
264                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
265                                 string saveLabel = lookup[0]->getLabel();
266                         
267                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
268                                 lookup = input->getSharedRAbundVectors(lastLabel);
269                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
270                                 
271                                 process(lookup);
272                                 
273                                 processedLabels.insert(lookup[0]->getLabel());
274                                 userLabels.erase(lookup[0]->getLabel());
275                                 
276                                 //restore real lastlabel to save below
277                                 lookup[0]->setLabel(saveLabel);
278                         }
279                         
280                         lastLabel = lookup[0]->getLabel();
281                         //prevent memory leak
282                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
283                         
284                         if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]); } return 0; }
285
286                         //get next line to process
287                         lookup = input->getSharedRAbundVectors();                               
288                 }
289                 
290                 if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); }  return 0; }
291
292                 //output error messages about any remaining user labels
293                 set<string>::iterator it;
294                 bool needToRun = false;
295                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
296                         m->mothurOut("Your file does not include the label " + *it); 
297                         if (processedLabels.count(lastLabel) != 1) {
298                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
299                                 needToRun = true;
300                         }else {
301                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
302                         }
303                 }
304         
305                 //run last label if you need to
306                 if (needToRun == true)  {
307                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
308                         lookup = input->getSharedRAbundVectors(lastLabel);
309                         
310                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
311                         
312                         process(lookup);
313                         
314                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
315                 }
316         
317                 //reset groups parameter
318                 m->Groups.clear();  
319                 delete input; 
320                 delete designMap;
321                 
322                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0;}
323                 
324                 m->mothurOutEndLine();
325                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
326                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
327                 m->mothurOutEndLine();
328                 
329                 return 0;
330         }
331         catch(exception& e) {
332                 m->errorOut(e, "MetaStatsCommand", "execute");
333                 exit(1);
334         }
335 }
336 //**********************************************************************************************************************
337
338 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
339         try {
340                 
341                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
342                                 if(processors == 1){
343                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
344                                 }else{
345                                         int process = 1;
346                                         vector<int> processIDS;
347                 
348                                         //loop through and create all the processes you want
349                                         while (process != processors) {
350                                                 int pid = fork();
351                         
352                                                 if (pid > 0) {
353                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
354                                                         process++;
355                                                 }else if (pid == 0){
356                                                         driver(lines[process].start, lines[process].num, thisLookUp);
357                                                         exit(0);
358                                                 }else { 
359                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
360                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
361                                                         exit(0);
362                                                 }
363                                         }
364                                         
365                                         //do my part
366                                         driver(lines[0].start, lines[0].num, thisLookUp);
367                 
368                                         //force parent to wait until all the processes are done
369                                         for (int i=0;i<(processors-1);i++) { 
370                                                 int temp = processIDS[i];
371                                                 wait(&temp);
372                                         }
373                                 }
374                 #else
375                                 driver(0, namesOfGroupCombos.size(), thisLookUp);
376                 #endif
377
378                 return 0;
379                 
380         }
381         catch(exception& e) {
382                 m->errorOut(e, "MetaStatsCommand", "process");
383                 exit(1);
384         }
385 }
386 //**********************************************************************************************************************
387 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
388         try {
389         
390                 //for each combo
391                 for (int c = start; c < (start+num); c++) {
392                         
393                         //get set names
394                         string setA = namesOfGroupCombos[c][0]; 
395                         string setB = namesOfGroupCombos[c][1];
396                         
397                         //get filename
398                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
399                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
400                         int nameLength = outputFileName.length();
401                         char * output = new char[nameLength];
402                         strcpy(output, outputFileName.c_str());
403         
404                         //build matrix from shared rabunds
405                         double** data;
406                         data = new double*[thisLookUp[0]->getNumBins()];
407                         
408                         vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
409                         
410                         vector<SharedRAbundVector*> subset;
411                         int setACount = 0;
412                         int setBCount = 0;
413                         for (int i = 0; i < thisLookUp.size(); i++) {
414                                 string thisGroup = thisLookUp[i]->getGroup();
415                                 
416                                 //is this group for a set we want to compare??
417                                 //sorting the sets by putting setB at the back and setA in the front
418                                 if ((designMap->getGroup(thisGroup) == setB)) {  
419                                         subset.push_back(thisLookUp[i]);
420                                         setBCount++;
421                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
422                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
423                                         setACount++;
424                                 }
425                         }
426                                                 
427                         if ((setACount == 0) || (setBCount == 0))  { 
428                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
429                                 outputNames.pop_back();
430                         }else {
431                                 //fill data
432                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
433                                         data[j] = new double[subset.size()];
434                                         data2[j].resize(subset.size(), 0.0);
435                                         for (int i = 0; i < subset.size(); i++) {
436                                                 data[j][i] = (subset[i]->getAbundance(j));
437                                                 data2[j][i] = (subset[i]->getAbundance(j));
438                                         }
439                                 }
440                                 
441                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
442                                 metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
443                                 
444                                 m->mothurOutEndLine();
445                                 MothurMetastats mothurMeta(threshold, iters);
446                                 mothurMeta.runMetastats(outputFileName+".myVersion" , data2, setACount);
447                                 m->mothurOutEndLine();
448                                 
449                                 m->mothurOutEndLine(); 
450                         }
451                         
452                         //free memory
453                         delete output;
454                         for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
455                         delete[] data; 
456                 }
457                 
458                 return 0;
459
460         }
461         catch(exception& e) {
462                 m->errorOut(e, "MetaStatsCommand", "driver");
463                 exit(1);
464         }
465 }
466 //**********************************************************************************************************************