]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
changes while testing
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "sharedutilities.h"
12
13
14 //**********************************************************************************************************************
15 vector<string> MetaStatsCommand::setParameters(){       
16         try {
17                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","metastats",false,true,true); parameters.push_back(pshared);
18                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
19                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
20                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
21                 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold);
22                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
23                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
24                 CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "MetaStatsCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string MetaStatsCommand::getHelpString(){       
39         try {
40                 string helpString = "";
41                 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
42                 helpString += "The metastats command outputs a .metastats file. \n";
43                 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors.  The shared and design parameters are required, unless you have valid current files.\n";
44                 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
45                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
46                 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
47                 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n";
48                 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
49                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
50                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
51                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
52                 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
53                 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
54                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
56                 return helpString;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "MetaStatsCommand", "getHelpString");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 string MetaStatsCommand::getOutputPattern(string type) {
65     try {
66         string pattern = "";
67         
68         if (type == "metastats") {  pattern = "[filename],[distance],[group],metastats"; } 
69         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
70         
71         return pattern;
72     }
73     catch(exception& e) {
74         m->errorOut(e, "MetaStatsCommand", "getOutputPattern");
75         exit(1);
76     }
77 }
78 //**********************************************************************************************************************
79 MetaStatsCommand::MetaStatsCommand(){   
80         try {
81                 abort = true; calledHelp = true;
82                 setParameters();
83                 vector<string> tempOutNames;
84                 outputTypes["metastats"] = tempOutNames;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92
93 MetaStatsCommand::MetaStatsCommand(string option) {
94         try {
95                 abort = false; calledHelp = false;   
96                 allLines = 1;
97                 
98                 
99                 //allow user to run help
100                 if(option == "help") { help(); abort = true; calledHelp = true; }
101                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
102                 
103                 else {
104                         vector<string> myArray = setParameters();
105                         
106                         OptionParser parser(option);
107                         map<string,string> parameters = parser.getParameters();
108                         
109                         ValidParameters validParameter;
110                         
111                         //check to make sure all parameters are valid for command
112                         map<string,string>::iterator it;
113                         for (it = parameters.begin(); it != parameters.end(); it++) { 
114                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
115                         }
116                         
117                         //initialize outputTypes
118                         vector<string> tempOutNames;
119                         outputTypes["metastats"] = tempOutNames;
120                         
121                         
122                         //if the user changes the input directory command factory will send this info to us in the output parameter 
123                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
124                         if (inputDir == "not found"){   inputDir = "";          }
125                         else {
126                                 string path;
127                                 it = parameters.find("design");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
133                                 }
134                                 
135                                 it = parameters.find("shared");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
141                                 }
142                                 
143                         }
144                         
145                         //check for required parameters
146                         sharedfile = validParameter.validFile(parameters, "shared", true);
147                         if (sharedfile == "not open") { abort = true; }
148                         else if (sharedfile == "not found") {                           //if there is a current shared file, use it
149                                 sharedfile = m->getSharedFile(); 
150                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
151                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
152                         }else { m->setSharedFile(sharedfile); }
153                         
154                         //check for required parameters
155                         designfile = validParameter.validFile(parameters, "design", true);
156                         if (designfile == "not open") { abort = true; }
157                         else if (designfile == "not found") {                           
158                                 //if there is a current design file, use it
159                                 designfile = m->getDesignFile(); 
160                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
161                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
162                         }else { m->setDesignFile(designfile); }
163                         
164                         //if the user changes the output directory command factory will send this info to us in the output parameter 
165                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
166                                 outputDir = ""; 
167                                 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it      
168                         }
169
170                         //check for optional parameter and set defaults
171                         // ...at some point should added some additional type checking...
172                         label = validParameter.validFile(parameters, "label", false);                   
173                         if (label == "not found") { label = ""; }
174                         else { 
175                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
176                                 else { allLines = 1;  }
177                         }
178                         
179                         groups = validParameter.validFile(parameters, "groups", false);                 
180                         if (groups == "not found") { groups = ""; pickedGroups = false; }
181                         else { 
182                                 pickedGroups = true;
183                                 m->splitAtDash(groups, Groups);
184                                 m->setGroups(Groups);
185                         }
186                         
187                         sets = validParameter.validFile(parameters, "sets", false);                     
188                         if (sets == "not found") { sets = ""; }
189                         else { 
190                                 m->splitAtDash(sets, Sets);
191                         }
192
193                         
194                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
195                         m->mothurConvert(temp, iters); 
196                         
197                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
198                         m->mothurConvert(temp, threshold); 
199                         
200                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
201                         m->setProcessors(temp);
202                         m->mothurConvert(temp, processors);
203                 }
204
205         }
206         catch(exception& e) {
207                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
208                 exit(1);
209         }
210 }
211 //**********************************************************************************************************************
212
213 int MetaStatsCommand::execute(){
214         try {
215         
216                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
217         
218         //just used to convert files to test metastats online
219         /****************************************************/
220         bool convertInputToShared = false;
221         convertSharedToInput = false;
222         if (convertInputToShared) { convertToShared(sharedfile); return 0; }
223         /****************************************************/
224         
225                 
226                 designMap = new GroupMap(designfile);
227                 designMap->readDesignMap();
228         
229                 input = new InputData(sharedfile, "sharedfile");
230                 lookup = input->getSharedRAbundVectors();
231                 string lastLabel = lookup[0]->getLabel();
232                 
233                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
234                 set<string> processedLabels;
235                 set<string> userLabels = labels;
236                 
237                 //setup the pairwise comparions of sets for metastats
238                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
239                 //make sure sets are all in designMap
240                 SharedUtil* util = new SharedUtil(); 
241                 vector<string> dGroups = designMap->getNamesOfGroups();
242                 util->setGroups(Sets, dGroups);  
243                 delete util;
244                 
245                 int numGroups = Sets.size();
246                 for (int a=0; a<numGroups; a++) { 
247                         for (int l = 0; l < a; l++) {   
248                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
249                                 namesOfGroupCombos.push_back(groups);
250                         }
251                 }
252         
253                 
254                 //only 1 combo
255                 if (numGroups == 2) { processors = 1; }
256                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
257
258         if(processors != 1){
259             int numPairs = namesOfGroupCombos.size();
260             int numPairsPerProcessor = numPairs / processors;
261                         
262             for (int i = 0; i < processors; i++) {
263                 int startPos = i * numPairsPerProcessor;
264                 if(i == processors - 1){
265                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
266                 }
267                 lines.push_back(linePair(startPos, numPairsPerProcessor));
268             }
269         }
270                 
271                 //as long as you are not at the end of the file or done wih the lines you want
272                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
273                         
274                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
275         
276                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
277
278                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
279                                 process(lookup);
280                                 
281                                 processedLabels.insert(lookup[0]->getLabel());
282                                 userLabels.erase(lookup[0]->getLabel());
283                         }
284                         
285                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
286                                 string saveLabel = lookup[0]->getLabel();
287                         
288                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
289                                 lookup = input->getSharedRAbundVectors(lastLabel);
290                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
291                                 
292                                 process(lookup);
293                                 
294                                 processedLabels.insert(lookup[0]->getLabel());
295                                 userLabels.erase(lookup[0]->getLabel());
296                                 
297                                 //restore real lastlabel to save below
298                                 lookup[0]->setLabel(saveLabel);
299                         }
300                         
301                         lastLabel = lookup[0]->getLabel();
302                         //prevent memory leak
303                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
304                         
305                         if (m->control_pressed) {  outputTypes.clear(); m->clearGroups(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); } return 0; }
306
307                         //get next line to process
308                         lookup = input->getSharedRAbundVectors();                               
309                 }
310                 
311                 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
312
313                 //output error messages about any remaining user labels
314                 set<string>::iterator it;
315                 bool needToRun = false;
316                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
317                         m->mothurOut("Your file does not include the label " + *it); 
318                         if (processedLabels.count(lastLabel) != 1) {
319                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
320                                 needToRun = true;
321                         }else {
322                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
323                         }
324                 }
325         
326                 //run last label if you need to
327                 if (needToRun == true)  {
328                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
329                         lookup = input->getSharedRAbundVectors(lastLabel);
330                         
331                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
332                         
333                         process(lookup);
334                         
335                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
336                 }
337         
338                 //reset groups parameter
339                 m->clearGroups();  
340                 delete input; 
341                 delete designMap;
342                 
343                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0;}
344                 
345                 m->mothurOutEndLine();
346                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
347                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
348                 m->mothurOutEndLine();
349                 
350                 return 0;
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "MetaStatsCommand", "execute");
354                 exit(1);
355         }
356 }
357 //**********************************************************************************************************************
358
359 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
360         try {
361                 
362                 
363                                 if(processors == 1){
364                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
365                                 }else{
366                                         int process = 1;
367                                         vector<int> processIDS;
368                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
369                                         //loop through and create all the processes you want
370                                         while (process != processors) {
371                                                 int pid = fork();
372                         
373                                                 if (pid > 0) {
374                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
375                                                         process++;
376                                                 }else if (pid == 0){
377                                                         driver(lines[process].start, lines[process].num, thisLookUp);
378                                                         exit(0);
379                                                 }else { 
380                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
381                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
382                                                         exit(0);
383                                                 }
384                                         }
385                                         
386                                         //do my part
387                                         driver(lines[0].start, lines[0].num, thisLookUp);
388                 
389                                         //force parent to wait until all the processes are done
390                                         for (int i=0;i<(processors-1);i++) { 
391                                                 int temp = processIDS[i];
392                                                 wait(&temp);
393                                         }
394         #else
395                     
396                     //////////////////////////////////////////////////////////////////////////////////////////////////////
397                     //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
398                     //Above fork() will clone, so memory is separate, but that's not the case with windows, 
399                     //Taking advantage of shared memory to pass results vectors.
400                     //////////////////////////////////////////////////////////////////////////////////////////////////////
401                     
402                     vector<metastatsData*> pDataArray; 
403                     DWORD   dwThreadIdArray[processors-1];
404                     HANDLE  hThreadArray[processors-1]; 
405                     
406                     //Create processor worker threads.
407                     for( int i=1; i<processors; i++ ){
408                         
409                         //make copy of lookup so we don't get access violations
410                         vector<SharedRAbundVector*> newLookup;
411                         vector<string> designMapGroups;
412                         for (int k = 0; k < thisLookUp.size(); k++) {
413                             SharedRAbundVector* temp = new SharedRAbundVector();
414                             temp->setLabel(thisLookUp[k]->getLabel());
415                             temp->setGroup(thisLookUp[k]->getGroup());
416                             newLookup.push_back(temp);
417                             designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
418                         }
419                         
420                         //for each bin
421                         for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
422                             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
423                             for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
424                         }
425                         
426                         // Allocate memory for thread data.
427                         metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
428                         pDataArray.push_back(tempSum);
429                         processIDS.push_back(i);
430                         
431                         hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
432                     }
433                     
434                     //do my part
435                                         driver(lines[0].start, lines[0].num, thisLookUp);
436                     
437                     //Wait until all threads have terminated.
438                     WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
439                     
440                     //Close all thread handles and free memory allocations.
441                     for(int i=0; i < pDataArray.size(); i++){
442                         if (pDataArray[i]->count != (pDataArray[i]->num)) {
443                             m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
444                         }
445                         for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) {  delete pDataArray[i]->thisLookUp[j];  } 
446                         for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {  
447                             outputNames.push_back(pDataArray[i]->outputNames[j]);
448                             outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
449                         }
450                                                 
451                         CloseHandle(hThreadArray[i]);
452                         delete pDataArray[i];
453                     }
454         #endif
455
456                                 }
457                 
458                 return 0;
459                 
460         }
461         catch(exception& e) {
462                 m->errorOut(e, "MetaStatsCommand", "process");
463                 exit(1);
464         }
465 }
466 //**********************************************************************************************************************
467 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
468         try {
469         
470                 //for each combo
471                 for (int c = start; c < (start+num); c++) {
472                         
473                         //get set names
474                         string setA = namesOfGroupCombos[c][0];
475                         string setB = namesOfGroupCombos[c][1];
476                 
477                         //get filename
478             map<string, string> variables; 
479             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
480             variables["[distance]"] = thisLookUp[0]->getLabel();
481             variables["[group]"] = setA + "-" + setB;
482                         string outputFileName = getOutputFileName("metastats",variables);
483                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
484                         //int nameLength = outputFileName.length();
485                         //char * output = new char[nameLength];
486                         //strcpy(output, outputFileName.c_str());
487         
488                         //build matrix from shared rabunds
489                         //double** data;
490                         //data = new double*[thisLookUp[0]->getNumBins()];
491                         
492                         vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
493                         
494                         vector<SharedRAbundVector*> subset;
495                         int setACount = 0;
496                         int setBCount = 0;
497                         for (int i = 0; i < thisLookUp.size(); i++) {
498                                 string thisGroup = thisLookUp[i]->getGroup();
499                                 
500                                 //is this group for a set we want to compare??
501                                 //sorting the sets by putting setB at the back and setA in the front
502                                 if ((designMap->getGroup(thisGroup) == setB)) {  
503                                         subset.push_back(thisLookUp[i]);
504                                         setBCount++;
505                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
506                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
507                                         setACount++;
508                                 }
509                         }
510                                                 
511                         if ((setACount == 0) || (setBCount == 0))  { 
512                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
513                                 outputNames.pop_back();
514                         }else {
515                 
516                                 //fill data
517                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
518                                         //data[j] = new double[subset.size()];
519                                         data2[j].resize(subset.size(), 0.0);
520                    
521                                         for (int i = 0; i < subset.size(); i++) {
522                                                 data2[j][i] = (subset[i]->getAbundance(j));
523                                         }
524                                 }
525                                 
526                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
527                                 //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
528                 if (convertSharedToInput) { convertToInput(subset, outputFileName);  }
529                                 
530                                 m->mothurOutEndLine();
531                                 MothurMetastats mothurMeta(threshold, iters);
532                                 mothurMeta.runMetastats(outputFileName , data2, setACount);
533                                 m->mothurOutEndLine();
534                                 m->mothurOutEndLine(); 
535                         }
536                         
537                         //free memory
538                         //delete output;
539                         //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
540                         //delete[] data; 
541                 }
542                 
543                 return 0;
544
545         }
546         catch(exception& e) {
547                 m->errorOut(e, "MetaStatsCommand", "driver");
548                 exit(1);
549         }
550 }
551 //**********************************************************************************************************************
552 /*Metastats files look like:
553  13_0   14_0    13_52   14_52   70S     71S     72S     M1      M2      M3      C11     C12     C21     C15     C16     C19     C3      C4      C9
554  Alphaproteobacteria    0       0       0       0       0       0       5       0       0       0       0       0       0       0       0       0       0       0       0
555  Mollicutes     0       0       2       0       0       59      5       11      4       1       0       2       8       1       0       1       0       3       0
556  Verrucomicrobiae       0       0       0       0       0       1       6       0       0       0       0       0       0       0       0       0       0       0       0
557  Deltaproteobacteria    0       0       0       0       0       6       1       0       1       0       1       1       7       0       0       0       0       0       0
558  Cyanobacteria  0       0       1       0       0       0       1       0       0       0       0       0       0       0       0       0       0       0       0
559  Epsilonproteobacteria  0       0       0       0       0       0       0       0       6       0       0       3       1       0       0       0       0       0       0
560  Clostridia     75      65      207     226     801     280     267     210     162     197     81      120     106     148     120     94      84      98      121
561  Bacilli        3       2       16      8       21      52      31      70      46      65      4       28      5       23      62      26      20      30      25
562  Bacteroidetes (class)  21      25      22      64      226     193     296     172     98      55      19      149     201     85      50      76      113     92      82
563  Gammaproteobacteria    0       0       0       0       0       1       0       0       0       0       1       1       0       0       0       1       0       0       0
564  TM7_genera_incertae_sedis      0       0       0       0       0       0       0       0       1       0       1       2       0       2       0       0       0       0       0
565  Actinobacteria (class) 1       1       1       2       0       0       0       9       3       7       1       1       1       3       1       2       1       2       3
566  Betaproteobacteria     0       0       3       3       0       0       9       1       1       0       1       2       3       1       1       0       0       0       0
567 */
568 //this function is just used to convert files to test the differences between the metastats version and mothurs version
569 int MetaStatsCommand::convertToShared(string filename) {
570         try {
571         ifstream in;
572         m->openInputFile(filename, in);
573         
574         string header = m->getline(in); m->gobble(in);
575         
576         vector<string> groups = m->splitWhiteSpace(header);
577         vector<SharedRAbundVector*> newLookup;
578         for (int i = 0; i < groups.size(); i++) {
579             SharedRAbundVector* temp = new SharedRAbundVector();
580             temp->setLabel("0.03");
581             temp->setGroup(groups[i]);
582             newLookup.push_back(temp);
583         }
584         
585         int otuCount = 0;
586         while (!in.eof()) {
587             if (m->control_pressed) { break; }
588             
589             string otuname;
590             in >> otuname; m->gobble(in);
591             otuCount++;
592             
593             for (int i = 0; i < groups.size(); i++) {
594                 int temp;
595                 in >> temp; m->gobble(in);
596                 newLookup[i]->push_back(temp, groups[i]);
597             }
598             m->gobble(in);
599         }
600         in.close();
601     
602         ofstream out;
603         m->openOutputFile(filename+".shared", out);
604         
605         out << "label\tgroup\tnumOTUs\t";
606         
607         string snumBins = toString(otuCount);
608         for (int i = 0; i < otuCount; i++) {
609             string binLabel = "Otu";
610             string sbinNumber = toString(i+1);
611             if (sbinNumber.length() < snumBins.length()) {
612                 int diff = snumBins.length() - sbinNumber.length();
613                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
614             }
615             binLabel += sbinNumber;
616             out << binLabel << '\t';
617         }
618         out << endl;
619         
620         for (int i = 0; i < groups.size(); i++) {
621             out << "0.03" << '\t' << groups[i] << '\t';
622             newLookup[i]->print(out);
623         }
624         out.close();
625         
626         cout << filename+".shared" << endl;
627         
628         return 0;
629     }
630         catch(exception& e) {
631                 m->errorOut(e, "MetaStatsCommand", "convertToShared");
632                 exit(1);
633         }
634 }
635 //**********************************************************************************************************************
636
637 int MetaStatsCommand::convertToInput(vector<SharedRAbundVector*>& subset, string thisfilename) {
638         try {
639         ofstream out;
640         m->openOutputFile(thisfilename+".matrix", out);
641         
642         out << "\t";
643         for (int i = 0; i < subset.size()-1; i++) {
644             out << subset[i]->getGroup() << '\t';
645         }
646         out << subset[subset.size()-1]->getGroup() << endl;
647         
648         for (int i = 0; i < subset[0]->getNumBins(); i++) {
649             out << m->currentBinLabels[i] << '\t';
650             for (int j = 0; j < subset.size()-1; j++) {
651                 out << subset[j]->getAbundance(i) << '\t';
652             }
653             out << subset[subset.size()-1]->getAbundance(i) << endl;
654         }
655         out.close();
656         
657         return 0;
658     }
659         catch(exception& e) {
660                 m->errorOut(e, "MetaStatsCommand", "convertToInput");
661                 exit(1);
662         }
663 }
664
665 //**********************************************************************************************************************