]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
added unix to ifdefs. minor changes while testing 1.24.0.
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> MetaStatsCommand::setParameters(){       
17         try {
18                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22                 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
23                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
24                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
25                 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MetaStatsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MetaStatsCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
43                 helpString += "The metastats command outputs a .metastats file. \n";
44                 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors.  The shared and design parameters are required, unless you have valid current files.\n";
45                 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
46                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
47                 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48                 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n";
49                 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
50                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
52                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
53                 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
54                 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
55                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MetaStatsCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 MetaStatsCommand::MetaStatsCommand(){   
66         try {
67                 abort = true; calledHelp = true;
68                 setParameters();
69                 vector<string> tempOutNames;
70                 outputTypes["metastats"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78
79 MetaStatsCommand::MetaStatsCommand(string option) {
80         try {
81                 abort = false; calledHelp = false;   
82                 allLines = 1;
83                 
84                 
85                 //allow user to run help
86                 if(option == "help") { help(); abort = true; calledHelp = true; }
87                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
88                 
89                 else {
90                         vector<string> myArray = setParameters();
91                         
92                         OptionParser parser(option);
93                         map<string,string> parameters = parser.getParameters();
94                         
95                         ValidParameters validParameter;
96                         
97                         //check to make sure all parameters are valid for command
98                         map<string,string>::iterator it;
99                         for (it = parameters.begin(); it != parameters.end(); it++) { 
100                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
101                         }
102                         
103                         //initialize outputTypes
104                         vector<string> tempOutNames;
105                         outputTypes["metastats"] = tempOutNames;
106                         
107                         
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("design");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
119                                 }
120                                 
121                                 it = parameters.find("shared");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
127                                 }
128                                 
129                         }
130                         
131                         //check for required parameters
132                         sharedfile = validParameter.validFile(parameters, "shared", true);
133                         if (sharedfile == "not open") { abort = true; }
134                         else if (sharedfile == "not found") {                           //if there is a current shared file, use it
135                                 sharedfile = m->getSharedFile(); 
136                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
137                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
138                         }else { m->setSharedFile(sharedfile); }
139                         
140                         //check for required parameters
141                         designfile = validParameter.validFile(parameters, "design", true);
142                         if (designfile == "not open") { abort = true; }
143                         else if (designfile == "not found") {                           
144                                 //if there is a current design file, use it
145                                 designfile = m->getDesignFile(); 
146                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setDesignFile(designfile); }
149                         
150                         //if the user changes the output directory command factory will send this info to us in the output parameter 
151                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
152                                 outputDir = ""; 
153                                 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it      
154                         }
155
156                         //check for optional parameter and set defaults
157                         // ...at some point should added some additional type checking...
158                         label = validParameter.validFile(parameters, "label", false);                   
159                         if (label == "not found") { label = ""; }
160                         else { 
161                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
162                                 else { allLines = 1;  }
163                         }
164                         
165                         groups = validParameter.validFile(parameters, "groups", false);                 
166                         if (groups == "not found") { groups = ""; pickedGroups = false; }
167                         else { 
168                                 pickedGroups = true;
169                                 m->splitAtDash(groups, Groups);
170                                 m->setGroups(Groups);
171                         }
172                         
173                         sets = validParameter.validFile(parameters, "sets", false);                     
174                         if (sets == "not found") { sets = ""; }
175                         else { 
176                                 m->splitAtDash(sets, Sets);
177                         }
178
179                         
180                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
181                         m->mothurConvert(temp, iters); 
182                         
183                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
184                         m->mothurConvert(temp, threshold); 
185                         
186                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
187                         m->setProcessors(temp);
188                         m->mothurConvert(temp, processors);
189                 }
190
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
194                 exit(1);
195         }
196 }
197 //**********************************************************************************************************************
198
199 int MetaStatsCommand::execute(){
200         try {
201         
202                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
203                 
204                 designMap = new GroupMap(designfile);
205                 designMap->readDesignMap();
206         
207                 input = new InputData(sharedfile, "sharedfile");
208                 lookup = input->getSharedRAbundVectors();
209                 string lastLabel = lookup[0]->getLabel();
210                 
211                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
212                 set<string> processedLabels;
213                 set<string> userLabels = labels;
214                 
215                 //setup the pairwise comparions of sets for metastats
216                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
217                 //make sure sets are all in designMap
218                 SharedUtil* util = new SharedUtil(); 
219                 vector<string> dGroups = designMap->getNamesOfGroups();
220                 util->setGroups(Sets, dGroups);  
221                 delete util;
222                 
223                 int numGroups = Sets.size();
224                 for (int a=0; a<numGroups; a++) { 
225                         for (int l = 0; l < a; l++) {   
226                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
227                                 namesOfGroupCombos.push_back(groups);
228                         }
229                 }
230         
231                 
232                 //only 1 combo
233                 if (numGroups == 2) { processors = 1; }
234                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
235
236         if(processors != 1){
237             int numPairs = namesOfGroupCombos.size();
238             int numPairsPerProcessor = numPairs / processors;
239                         
240             for (int i = 0; i < processors; i++) {
241                 int startPos = i * numPairsPerProcessor;
242                 if(i == processors - 1){
243                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
244                 }
245                 lines.push_back(linePair(startPos, numPairsPerProcessor));
246             }
247         }
248                 
249                 //as long as you are not at the end of the file or done wih the lines you want
250                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
251                         
252                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
253         
254                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
255
256                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
257                                 process(lookup);
258                                 
259                                 processedLabels.insert(lookup[0]->getLabel());
260                                 userLabels.erase(lookup[0]->getLabel());
261                         }
262                         
263                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
264                                 string saveLabel = lookup[0]->getLabel();
265                         
266                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
267                                 lookup = input->getSharedRAbundVectors(lastLabel);
268                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
269                                 
270                                 process(lookup);
271                                 
272                                 processedLabels.insert(lookup[0]->getLabel());
273                                 userLabels.erase(lookup[0]->getLabel());
274                                 
275                                 //restore real lastlabel to save below
276                                 lookup[0]->setLabel(saveLabel);
277                         }
278                         
279                         lastLabel = lookup[0]->getLabel();
280                         //prevent memory leak
281                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
282                         
283                         if (m->control_pressed) {  outputTypes.clear(); m->clearGroups(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); } return 0; }
284
285                         //get next line to process
286                         lookup = input->getSharedRAbundVectors();                               
287                 }
288                 
289                 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
290
291                 //output error messages about any remaining user labels
292                 set<string>::iterator it;
293                 bool needToRun = false;
294                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
295                         m->mothurOut("Your file does not include the label " + *it); 
296                         if (processedLabels.count(lastLabel) != 1) {
297                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
298                                 needToRun = true;
299                         }else {
300                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
301                         }
302                 }
303         
304                 //run last label if you need to
305                 if (needToRun == true)  {
306                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
307                         lookup = input->getSharedRAbundVectors(lastLabel);
308                         
309                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
310                         
311                         process(lookup);
312                         
313                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
314                 }
315         
316                 //reset groups parameter
317                 m->clearGroups();  
318                 delete input; 
319                 delete designMap;
320                 
321                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0;}
322                 
323                 m->mothurOutEndLine();
324                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
325                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
326                 m->mothurOutEndLine();
327                 
328                 return 0;
329         }
330         catch(exception& e) {
331                 m->errorOut(e, "MetaStatsCommand", "execute");
332                 exit(1);
333         }
334 }
335 //**********************************************************************************************************************
336
337 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
338         try {
339                 
340                 
341                                 if(processors == 1){
342                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
343                                 }else{
344                                         int process = 1;
345                                         vector<int> processIDS;
346                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
347                                         //loop through and create all the processes you want
348                                         while (process != processors) {
349                                                 int pid = fork();
350                         
351                                                 if (pid > 0) {
352                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
353                                                         process++;
354                                                 }else if (pid == 0){
355                                                         driver(lines[process].start, lines[process].num, thisLookUp);
356                                                         exit(0);
357                                                 }else { 
358                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
359                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
360                                                         exit(0);
361                                                 }
362                                         }
363                                         
364                                         //do my part
365                                         driver(lines[0].start, lines[0].num, thisLookUp);
366                 
367                                         //force parent to wait until all the processes are done
368                                         for (int i=0;i<(processors-1);i++) { 
369                                                 int temp = processIDS[i];
370                                                 wait(&temp);
371                                         }
372         #else
373                     
374                     //////////////////////////////////////////////////////////////////////////////////////////////////////
375                     //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
376                     //Above fork() will clone, so memory is separate, but that's not the case with windows, 
377                     //Taking advantage of shared memory to pass results vectors.
378                     //////////////////////////////////////////////////////////////////////////////////////////////////////
379                     
380                     vector<metastatsData*> pDataArray; 
381                     DWORD   dwThreadIdArray[processors-1];
382                     HANDLE  hThreadArray[processors-1]; 
383                     
384                     //Create processor worker threads.
385                     for( int i=1; i<processors; i++ ){
386                         
387                         //make copy of lookup so we don't get access violations
388                         vector<SharedRAbundVector*> newLookup;
389                         vector<string> designMapGroups;
390                         for (int k = 0; k < thisLookUp.size(); k++) {
391                             SharedRAbundVector* temp = new SharedRAbundVector();
392                             temp->setLabel(thisLookUp[k]->getLabel());
393                             temp->setGroup(thisLookUp[k]->getGroup());
394                             newLookup.push_back(temp);
395                             designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
396                         }
397                         
398                         //for each bin
399                         for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
400                             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
401                             for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
402                         }
403                         
404                         // Allocate memory for thread data.
405                         metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
406                         pDataArray.push_back(tempSum);
407                         processIDS.push_back(i);
408                         
409                         hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
410                     }
411                     
412                     //do my part
413                                         driver(lines[0].start, lines[0].num, thisLookUp);
414                     
415                     //Wait until all threads have terminated.
416                     WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
417                     
418                     //Close all thread handles and free memory allocations.
419                     for(int i=0; i < pDataArray.size(); i++){
420                         for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) {  delete pDataArray[i]->thisLookUp[j];  } 
421                         for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {  
422                             outputNames.push_back(pDataArray[i]->outputNames[j]);
423                             outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
424                         }
425                                                 
426                         CloseHandle(hThreadArray[i]);
427                         delete pDataArray[i];
428                     }
429         #endif
430
431                                 }
432                 
433                 return 0;
434                 
435         }
436         catch(exception& e) {
437                 m->errorOut(e, "MetaStatsCommand", "process");
438                 exit(1);
439         }
440 }
441 //**********************************************************************************************************************
442 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
443         try {
444         
445                 //for each combo
446                 for (int c = start; c < (start+num); c++) {
447                         
448                         //get set names
449                         string setA = namesOfGroupCombos[c][0]; 
450                         string setB = namesOfGroupCombos[c][1];
451                 
452                         //get filename
453                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
454                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
455                         //int nameLength = outputFileName.length();
456                         //char * output = new char[nameLength];
457                         //strcpy(output, outputFileName.c_str());
458         
459                         //build matrix from shared rabunds
460                         //double** data;
461                         //data = new double*[thisLookUp[0]->getNumBins()];
462                         
463                         vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
464                         
465                         vector<SharedRAbundVector*> subset;
466                         int setACount = 0;
467                         int setBCount = 0;
468                         for (int i = 0; i < thisLookUp.size(); i++) {
469                                 string thisGroup = thisLookUp[i]->getGroup();
470                                 
471                                 //is this group for a set we want to compare??
472                                 //sorting the sets by putting setB at the back and setA in the front
473                                 if ((designMap->getGroup(thisGroup) == setB)) {  
474                                         subset.push_back(thisLookUp[i]);
475                                         setBCount++;
476                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
477                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
478                                         setACount++;
479                                 }
480                         }
481                                                 
482                         if ((setACount == 0) || (setBCount == 0))  { 
483                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
484                                 outputNames.pop_back();
485                         }else {
486                                 //fill data
487                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
488                                         //data[j] = new double[subset.size()];
489                                         data2[j].resize(subset.size(), 0.0);
490                                         for (int i = 0; i < subset.size(); i++) {
491                                                 //data[j][i] = (subset[i]->getAbundance(j));
492                                                 data2[j][i] = (subset[i]->getAbundance(j));
493                                         }
494                                 }
495                                 
496                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
497                                 //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
498                                 
499                                 m->mothurOutEndLine();
500                                 MothurMetastats mothurMeta(threshold, iters);
501                                 mothurMeta.runMetastats(outputFileName , data2, setACount);
502                                 m->mothurOutEndLine();
503                                 
504                                 m->mothurOutEndLine(); 
505                         }
506                         
507                         //free memory
508                         //delete output;
509                         //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
510                         //delete[] data; 
511                 }
512                 
513                 return 0;
514
515         }
516         catch(exception& e) {
517                 m->errorOut(e, "MetaStatsCommand", "driver");
518                 exit(1);
519         }
520 }
521 //**********************************************************************************************************************