]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
e23224bf20e9977e842bc78e765cd95e018a1666
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "metastats.h"
12 #include "sharedutilities.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> MetaStatsCommand::setParameters(){       
17         try {
18                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22                 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
23                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
24                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
25                 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MetaStatsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MetaStatsCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
43                 helpString += "The metastats command outputs a .metastats file. \n";
44                 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors.  The shared and design parameters are required, unless you have valid current files.\n";
45                 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
46                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
47                 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48                 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n";
49                 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
50                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
52                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
53                 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
54                 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
55                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MetaStatsCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 MetaStatsCommand::MetaStatsCommand(){   
66         try {
67                 abort = true; calledHelp = true;
68                 setParameters();
69                 vector<string> tempOutNames;
70                 outputTypes["metastats"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78
79 MetaStatsCommand::MetaStatsCommand(string option) {
80         try {
81                 abort = false; calledHelp = false;   
82                 allLines = 1;
83                 
84                 
85                 //allow user to run help
86                 if(option == "help") { help(); abort = true; calledHelp = true; }
87                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
88                 
89                 else {
90                         vector<string> myArray = setParameters();
91                         
92                         OptionParser parser(option);
93                         map<string,string> parameters = parser.getParameters();
94                         
95                         ValidParameters validParameter;
96                         
97                         //check to make sure all parameters are valid for command
98                         map<string,string>::iterator it;
99                         for (it = parameters.begin(); it != parameters.end(); it++) { 
100                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
101                         }
102                         
103                         //initialize outputTypes
104                         vector<string> tempOutNames;
105                         outputTypes["metastats"] = tempOutNames;
106                         
107                         
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("design");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
119                                 }
120                                 
121                                 it = parameters.find("shared");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
127                                 }
128                                 
129                         }
130                         
131                         //check for required parameters
132                         sharedfile = validParameter.validFile(parameters, "shared", true);
133                         if (sharedfile == "not open") { abort = true; }
134                         else if (sharedfile == "not found") {                           //if there is a current shared file, use it
135                                 sharedfile = m->getSharedFile(); 
136                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
137                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
138                         }else { m->setSharedFile(sharedfile); }
139                         
140                         //check for required parameters
141                         designfile = validParameter.validFile(parameters, "design", true);
142                         if (designfile == "not open") { abort = true; }
143                         else if (designfile == "not found") {                           
144                                 //if there is a current design file, use it
145                                 designfile = m->getDesignFile(); 
146                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setDesignFile(designfile); }
149                         
150                         //if the user changes the output directory command factory will send this info to us in the output parameter 
151                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
152                                 outputDir = ""; 
153                                 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it      
154                         }
155
156                         //check for optional parameter and set defaults
157                         // ...at some point should added some additional type checking...
158                         label = validParameter.validFile(parameters, "label", false);                   
159                         if (label == "not found") { label = ""; }
160                         else { 
161                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
162                                 else { allLines = 1;  }
163                         }
164                         
165                         groups = validParameter.validFile(parameters, "groups", false);                 
166                         if (groups == "not found") { groups = ""; pickedGroups = false; }
167                         else { 
168                                 pickedGroups = true;
169                                 m->splitAtDash(groups, Groups);
170                                 m->setGroups(Groups);
171                         }
172                         
173                         sets = validParameter.validFile(parameters, "sets", false);                     
174                         if (sets == "not found") { sets = ""; }
175                         else { 
176                                 m->splitAtDash(sets, Sets);
177                         }
178
179                         
180                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
181                         m->mothurConvert(temp, iters); 
182                         
183                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
184                         m->mothurConvert(temp, threshold); 
185                         
186                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
187                         m->setProcessors(temp);
188                         m->mothurConvert(temp, processors);
189                 }
190
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
194                 exit(1);
195         }
196 }
197 //**********************************************************************************************************************
198
199 int MetaStatsCommand::execute(){
200         try {
201         
202                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
203                 
204                 designMap = new GroupMap(designfile);
205                 designMap->readDesignMap();
206         
207                 input = new InputData(sharedfile, "sharedfile");
208                 lookup = input->getSharedRAbundVectors();
209                 string lastLabel = lookup[0]->getLabel();
210                 
211                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
212                 set<string> processedLabels;
213                 set<string> userLabels = labels;
214                 
215                 //setup the pairwise comparions of sets for metastats
216                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
217                 //make sure sets are all in designMap
218                 SharedUtil* util = new SharedUtil(); 
219                 vector<string> dGroups = designMap->getNamesOfGroups();
220                 util->setGroups(Sets, dGroups);  
221                 delete util;
222                 
223                 int numGroups = Sets.size();
224                 for (int a=0; a<numGroups; a++) { 
225                         for (int l = 0; l < a; l++) {   
226                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
227                                 namesOfGroupCombos.push_back(groups);
228                         }
229                 }
230         
231                 
232                 //only 1 combo
233                 if (numGroups == 2) { processors = 1; }
234                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
235                 
236 //              #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
237                         if(processors != 1){
238                                 int numPairs = namesOfGroupCombos.size();
239                                 int numPairsPerProcessor = numPairs / processors;
240                         
241                                 for (int i = 0; i < processors; i++) {
242                                         int startPos = i * numPairsPerProcessor;
243                                         if(i == processors - 1){
244                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
245                                         }
246                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
247                                 }
248                         }
249 //              #endif
250                 
251                 //as long as you are not at the end of the file or done wih the lines you want
252                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
253                         
254                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
255         
256                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
257
258                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
259                                 process(lookup);
260                                 
261                                 processedLabels.insert(lookup[0]->getLabel());
262                                 userLabels.erase(lookup[0]->getLabel());
263                         }
264                         
265                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
266                                 string saveLabel = lookup[0]->getLabel();
267                         
268                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
269                                 lookup = input->getSharedRAbundVectors(lastLabel);
270                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
271                                 
272                                 process(lookup);
273                                 
274                                 processedLabels.insert(lookup[0]->getLabel());
275                                 userLabels.erase(lookup[0]->getLabel());
276                                 
277                                 //restore real lastlabel to save below
278                                 lookup[0]->setLabel(saveLabel);
279                         }
280                         
281                         lastLabel = lookup[0]->getLabel();
282                         //prevent memory leak
283                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
284                         
285                         if (m->control_pressed) {  outputTypes.clear(); m->clearGroups(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); } return 0; }
286
287                         //get next line to process
288                         lookup = input->getSharedRAbundVectors();                               
289                 }
290                 
291                 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
292
293                 //output error messages about any remaining user labels
294                 set<string>::iterator it;
295                 bool needToRun = false;
296                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
297                         m->mothurOut("Your file does not include the label " + *it); 
298                         if (processedLabels.count(lastLabel) != 1) {
299                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
300                                 needToRun = true;
301                         }else {
302                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
303                         }
304                 }
305         
306                 //run last label if you need to
307                 if (needToRun == true)  {
308                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
309                         lookup = input->getSharedRAbundVectors(lastLabel);
310                         
311                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
312                         
313                         process(lookup);
314                         
315                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
316                 }
317         
318                 //reset groups parameter
319                 m->clearGroups();  
320                 delete input; 
321                 delete designMap;
322                 
323                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0;}
324                 
325                 m->mothurOutEndLine();
326                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
327                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
328                 m->mothurOutEndLine();
329                 
330                 return 0;
331         }
332         catch(exception& e) {
333                 m->errorOut(e, "MetaStatsCommand", "execute");
334                 exit(1);
335         }
336 }
337 //**********************************************************************************************************************
338
339 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
340         try {
341                 
342                 
343                                 if(processors == 1){
344                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
345                                 }else{
346                                         int process = 1;
347                                         vector<int> processIDS;
348                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
349                                         //loop through and create all the processes you want
350                                         while (process != processors) {
351                                                 int pid = fork();
352                         
353                                                 if (pid > 0) {
354                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
355                                                         process++;
356                                                 }else if (pid == 0){
357                                                         driver(lines[process].start, lines[process].num, thisLookUp);
358                                                         exit(0);
359                                                 }else { 
360                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
361                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
362                                                         exit(0);
363                                                 }
364                                         }
365                                         
366                                         //do my part
367                                         driver(lines[0].start, lines[0].num, thisLookUp);
368                 
369                                         //force parent to wait until all the processes are done
370                                         for (int i=0;i<(processors-1);i++) { 
371                                                 int temp = processIDS[i];
372                                                 wait(&temp);
373                                         }
374         #else
375                     
376                     //////////////////////////////////////////////////////////////////////////////////////////////////////
377                     //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
378                     //Above fork() will clone, so memory is separate, but that's not the case with windows, 
379                     //Taking advantage of shared memory to pass results vectors.
380                     //////////////////////////////////////////////////////////////////////////////////////////////////////
381                     
382                     vector<metastatsData*> pDataArray; 
383                     DWORD   dwThreadIdArray[processors-1];
384                     HANDLE  hThreadArray[processors-1]; 
385                     
386                     //Create processor worker threads.
387                     for( int i=1; i<processors; i++ ){
388                         
389                         //make copy of lookup so we don't get access violations
390                         vector<SharedRAbundVector*> newLookup;
391                         vector<string> designMapGroups;
392                         for (int k = 0; k < thisLookUp.size(); k++) {
393                             SharedRAbundVector* temp = new SharedRAbundVector();
394                             temp->setLabel(thisLookUp[k]->getLabel());
395                             temp->setGroup(thisLookUp[k]->getGroup());
396                             newLookup.push_back(temp);
397                             designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
398                         }
399                         
400                         //for each bin
401                         for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
402                             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
403                             for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
404                         }
405                         
406                         // Allocate memory for thread data.
407                         metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
408                         pDataArray.push_back(tempSum);
409                         processIDS.push_back(i);
410                         
411                         hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
412                     }
413                     
414                     //do my part
415                                         driver(lines[0].start, lines[0].num, thisLookUp);
416                     
417                     //Wait until all threads have terminated.
418                     WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
419                     
420                     //Close all thread handles and free memory allocations.
421                     for(int i=0; i < pDataArray.size(); i++){
422                         for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) {  delete pDataArray[i]->thisLookUp[j];  } 
423                         for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {  
424                             outputNames.push_back(pDataArray[i]->outputNames[j]);
425                             outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
426                         }
427                                                 
428                         CloseHandle(hThreadArray[i]);
429                         delete pDataArray[i];
430                     }
431         #endif
432
433                                 }
434                 
435                 return 0;
436                 
437         }
438         catch(exception& e) {
439                 m->errorOut(e, "MetaStatsCommand", "process");
440                 exit(1);
441         }
442 }
443 //**********************************************************************************************************************
444 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
445         try {
446         
447                 //for each combo
448                 for (int c = start; c < (start+num); c++) {
449                         
450                         //get set names
451                         string setA = namesOfGroupCombos[c][0]; 
452                         string setB = namesOfGroupCombos[c][1];
453                 
454                         //get filename
455                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
456                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
457                         //int nameLength = outputFileName.length();
458                         //char * output = new char[nameLength];
459                         //strcpy(output, outputFileName.c_str());
460         
461                         //build matrix from shared rabunds
462                         //double** data;
463                         //data = new double*[thisLookUp[0]->getNumBins()];
464                         
465                         vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
466                         
467                         vector<SharedRAbundVector*> subset;
468                         int setACount = 0;
469                         int setBCount = 0;
470                         for (int i = 0; i < thisLookUp.size(); i++) {
471                                 string thisGroup = thisLookUp[i]->getGroup();
472                                 
473                                 //is this group for a set we want to compare??
474                                 //sorting the sets by putting setB at the back and setA in the front
475                                 if ((designMap->getGroup(thisGroup) == setB)) {  
476                                         subset.push_back(thisLookUp[i]);
477                                         setBCount++;
478                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
479                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
480                                         setACount++;
481                                 }
482                         }
483                                                 
484                         if ((setACount == 0) || (setBCount == 0))  { 
485                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
486                                 outputNames.pop_back();
487                         }else {
488                                 //fill data
489                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
490                                         //data[j] = new double[subset.size()];
491                                         data2[j].resize(subset.size(), 0.0);
492                                         for (int i = 0; i < subset.size(); i++) {
493                                                 //data[j][i] = (subset[i]->getAbundance(j));
494                                                 data2[j][i] = (subset[i]->getAbundance(j));
495                                         }
496                                 }
497                                 
498                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
499                                 //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
500                                 
501                                 m->mothurOutEndLine();
502                                 MothurMetastats mothurMeta(threshold, iters);
503                                 mothurMeta.runMetastats(outputFileName , data2, setACount);
504                                 m->mothurOutEndLine();
505                                 
506                                 m->mothurOutEndLine(); 
507                         }
508                         
509                         //free memory
510                         //delete output;
511                         //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
512                         //delete[] data; 
513                 }
514                 
515                 return 0;
516
517         }
518         catch(exception& e) {
519                 m->errorOut(e, "MetaStatsCommand", "driver");
520                 exit(1);
521         }
522 }
523 //**********************************************************************************************************************