]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
working on chimera.perseus. made removeConfidences function smarter. Fixed bug in...
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13 #include "mothurmetastats.h"
14
15 //**********************************************************************************************************************
16 vector<string> MetaStatsCommand::setParameters(){       
17         try {
18                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22                 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
23                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
24                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
25                 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MetaStatsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MetaStatsCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
43                 helpString += "The metastats command outputs a .metastats file. \n";
44                 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors.  The shared and design parameters are required, unless you have valid current files.\n";
45                 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
46                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
47                 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48                 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n";
49                 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
50                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
52                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
53                 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
54                 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
55                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MetaStatsCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 MetaStatsCommand::MetaStatsCommand(){   
66         try {
67                 abort = true; calledHelp = true;
68                 setParameters();
69                 vector<string> tempOutNames;
70                 outputTypes["metastats"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78
79 MetaStatsCommand::MetaStatsCommand(string option) {
80         try {
81                 abort = false; calledHelp = false;   
82                 allLines = 1;
83                 
84                 
85                 //allow user to run help
86                 if(option == "help") { help(); abort = true; calledHelp = true; }
87                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
88                 
89                 else {
90                         vector<string> myArray = setParameters();
91                         
92                         OptionParser parser(option);
93                         map<string,string> parameters = parser.getParameters();
94                         
95                         ValidParameters validParameter;
96                         
97                         //check to make sure all parameters are valid for command
98                         map<string,string>::iterator it;
99                         for (it = parameters.begin(); it != parameters.end(); it++) { 
100                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
101                         }
102                         
103                         //initialize outputTypes
104                         vector<string> tempOutNames;
105                         outputTypes["metastats"] = tempOutNames;
106                         
107                         
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("design");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
119                                 }
120                                 
121                                 it = parameters.find("shared");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
127                                 }
128                                 
129                         }
130                         
131                         //check for required parameters
132                         sharedfile = validParameter.validFile(parameters, "shared", true);
133                         if (sharedfile == "not open") { abort = true; }
134                         else if (sharedfile == "not found") {                           //if there is a current shared file, use it
135                                 sharedfile = m->getSharedFile(); 
136                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
137                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
138                         }else { m->setSharedFile(sharedfile); }
139                         
140                         //check for required parameters
141                         designfile = validParameter.validFile(parameters, "design", true);
142                         if (designfile == "not open") { abort = true; }
143                         else if (designfile == "not found") {                           
144                                 //if there is a current design file, use it
145                                 designfile = m->getDesignFile(); 
146                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setDesignFile(designfile); }
149                         
150                         //if the user changes the output directory command factory will send this info to us in the output parameter 
151                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
152                                 outputDir = ""; 
153                                 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it      
154                         }
155
156                         //check for optional parameter and set defaults
157                         // ...at some point should added some additional type checking...
158                         label = validParameter.validFile(parameters, "label", false);                   
159                         if (label == "not found") { label = ""; }
160                         else { 
161                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
162                                 else { allLines = 1;  }
163                         }
164                         
165                         groups = validParameter.validFile(parameters, "groups", false);                 
166                         if (groups == "not found") { groups = ""; pickedGroups = false; }
167                         else { 
168                                 pickedGroups = true;
169                                 m->splitAtDash(groups, Groups);
170                                 m->setGroups(Groups);
171                         }
172                         
173                         sets = validParameter.validFile(parameters, "sets", false);                     
174                         if (sets == "not found") { sets = ""; }
175                         else { 
176                                 m->splitAtDash(sets, Sets);
177                         }
178
179                         
180                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
181                         convert(temp, iters); 
182                         
183                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
184                         convert(temp, threshold); 
185                         
186                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
187                         m->setProcessors(temp);
188                         convert(temp, processors);
189                 }
190
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
194                 exit(1);
195         }
196 }
197 //**********************************************************************************************************************
198
199 int MetaStatsCommand::execute(){
200         try {
201         
202                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
203                 
204                 designMap = new GroupMap(designfile);
205                 designMap->readDesignMap();
206         
207                 input = new InputData(sharedfile, "sharedfile");
208                 lookup = input->getSharedRAbundVectors();
209                 string lastLabel = lookup[0]->getLabel();
210                 
211                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
212                 set<string> processedLabels;
213                 set<string> userLabels = labels;
214                 
215                 //setup the pairwise comparions of sets for metastats
216                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
217                 //make sure sets are all in designMap
218                 SharedUtil* util = new SharedUtil(); 
219                 vector<string> dGroups = designMap->getNamesOfGroups();
220                 util->setGroups(Sets, dGroups);  
221                 delete util;
222                 
223                 int numGroups = Sets.size();
224                 for (int a=0; a<numGroups; a++) { 
225                         for (int l = 0; l < a; l++) {   
226                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
227                                 namesOfGroupCombos.push_back(groups);
228                         }
229                 }
230         
231                 
232                 //only 1 combo
233                 if (numGroups == 2) { processors = 1; }
234                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
235                 
236                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
237                         if(processors != 1){
238                                 int numPairs = namesOfGroupCombos.size();
239                                 int numPairsPerProcessor = numPairs / processors;
240                         
241                                 for (int i = 0; i < processors; i++) {
242                                         int startPos = i * numPairsPerProcessor;
243                                         if(i == processors - 1){
244                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
245                                         }
246                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
247                                 }
248                         }
249                 #endif
250                 
251                 //as long as you are not at the end of the file or done wih the lines you want
252                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
253                         
254                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
255         
256                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
257
258                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
259                                 process(lookup);
260                                 
261                                 processedLabels.insert(lookup[0]->getLabel());
262                                 userLabels.erase(lookup[0]->getLabel());
263                         }
264                         
265                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
266                                 string saveLabel = lookup[0]->getLabel();
267                         
268                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
269                                 lookup = input->getSharedRAbundVectors(lastLabel);
270                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
271                                 
272                                 process(lookup);
273                                 
274                                 processedLabels.insert(lookup[0]->getLabel());
275                                 userLabels.erase(lookup[0]->getLabel());
276                                 
277                                 //restore real lastlabel to save below
278                                 lookup[0]->setLabel(saveLabel);
279                         }
280                         
281                         lastLabel = lookup[0]->getLabel();
282                         //prevent memory leak
283                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
284                         
285                         if (m->control_pressed) {  outputTypes.clear(); m->clearGroups(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); } return 0; }
286
287                         //get next line to process
288                         lookup = input->getSharedRAbundVectors();                               
289                 }
290                 
291                 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
292
293                 //output error messages about any remaining user labels
294                 set<string>::iterator it;
295                 bool needToRun = false;
296                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
297                         m->mothurOut("Your file does not include the label " + *it); 
298                         if (processedLabels.count(lastLabel) != 1) {
299                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
300                                 needToRun = true;
301                         }else {
302                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
303                         }
304                 }
305         
306                 //run last label if you need to
307                 if (needToRun == true)  {
308                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
309                         lookup = input->getSharedRAbundVectors(lastLabel);
310                         
311                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
312                         
313                         process(lookup);
314                         
315                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
316                 }
317         
318                 //reset groups parameter
319                 m->clearGroups();  
320                 delete input; 
321                 delete designMap;
322                 
323                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0;}
324                 
325                 m->mothurOutEndLine();
326                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
327                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
328                 m->mothurOutEndLine();
329                 
330                 return 0;
331         }
332         catch(exception& e) {
333                 m->errorOut(e, "MetaStatsCommand", "execute");
334                 exit(1);
335         }
336 }
337 //**********************************************************************************************************************
338
339 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
340         try {
341                 
342                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
343                                 if(processors == 1){
344                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
345                                 }else{
346                                         int process = 1;
347                                         vector<int> processIDS;
348                 
349                                         //loop through and create all the processes you want
350                                         while (process != processors) {
351                                                 int pid = fork();
352                         
353                                                 if (pid > 0) {
354                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
355                                                         process++;
356                                                 }else if (pid == 0){
357                                                         driver(lines[process].start, lines[process].num, thisLookUp);
358                                                         exit(0);
359                                                 }else { 
360                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
361                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
362                                                         exit(0);
363                                                 }
364                                         }
365                                         
366                                         //do my part
367                                         driver(lines[0].start, lines[0].num, thisLookUp);
368                 
369                                         //force parent to wait until all the processes are done
370                                         for (int i=0;i<(processors-1);i++) { 
371                                                 int temp = processIDS[i];
372                                                 wait(&temp);
373                                         }
374                                 }
375                 #else
376                                 driver(0, namesOfGroupCombos.size(), thisLookUp);
377                 #endif
378
379                 return 0;
380                 
381         }
382         catch(exception& e) {
383                 m->errorOut(e, "MetaStatsCommand", "process");
384                 exit(1);
385         }
386 }
387 //**********************************************************************************************************************
388 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
389         try {
390         
391                 //for each combo
392                 for (int c = start; c < (start+num); c++) {
393                         
394                         //get set names
395                         string setA = namesOfGroupCombos[c][0]; 
396                         string setB = namesOfGroupCombos[c][1];
397                 cout << setA << '\t' << setB << endl;
398                         //get filename
399                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
400                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
401                         //int nameLength = outputFileName.length();
402                         //char * output = new char[nameLength];
403                         //strcpy(output, outputFileName.c_str());
404         
405                         //build matrix from shared rabunds
406                         //double** data;
407                         //data = new double*[thisLookUp[0]->getNumBins()];
408                         
409                         vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
410                         
411                         vector<SharedRAbundVector*> subset;
412                         int setACount = 0;
413                         int setBCount = 0;
414                         for (int i = 0; i < thisLookUp.size(); i++) {
415                                 string thisGroup = thisLookUp[i]->getGroup();
416                                 
417                                 //is this group for a set we want to compare??
418                                 //sorting the sets by putting setB at the back and setA in the front
419                                 if ((designMap->getGroup(thisGroup) == setB)) {  
420                                         subset.push_back(thisLookUp[i]);
421                                         setBCount++;
422                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
423                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
424                                         setACount++;
425                                 }
426                         }
427                         
428                         for (int i = 0; i < subset.size(); i++) { cout << designMap->getGroup(subset[i]->getGroup()) << endl; }
429                         cout << setACount << endl;
430                         
431                         if ((setACount == 0) || (setBCount == 0))  { 
432                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
433                                 outputNames.pop_back();
434                         }else {
435                                 //fill data
436                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
437                                         //data[j] = new double[subset.size()];
438                                         data2[j].resize(subset.size(), 0.0);
439                                         for (int i = 0; i < subset.size(); i++) {
440                                                 //data[j][i] = (subset[i]->getAbundance(j));
441                                                 data2[j][i] = (subset[i]->getAbundance(j));
442                                         }
443                                 }
444                                 
445                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
446                                 //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
447                                 
448                                 m->mothurOutEndLine();
449                                 MothurMetastats mothurMeta(threshold, iters);
450                                 mothurMeta.runMetastats(outputFileName , data2, setACount);
451                                 m->mothurOutEndLine();
452                                 
453                                 m->mothurOutEndLine(); 
454                         }
455                         
456                         //free memory
457                         //delete output;
458                         //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
459                         //delete[] data; 
460                 }
461                 
462                 return 0;
463
464         }
465         catch(exception& e) {
466                 m->errorOut(e, "MetaStatsCommand", "driver");
467                 exit(1);
468         }
469 }
470 //**********************************************************************************************************************