]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
added Jensen-Shannon calc. working on get.communitytype command. fixed bug in get...
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "sharedutilities.h"
12 #include "sharedrabundfloatvector.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> MetaStatsCommand::setParameters(){       
17         try {
18                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","metastats",false,true,true); parameters.push_back(pshared);
19                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
22                 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold);
23                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
24                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
25                 CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MetaStatsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MetaStatsCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
43                 helpString += "The metastats command outputs a .metastats file. \n";
44                 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors.  The shared and design parameters are required, unless you have valid current files.\n";
45                 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
46                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
47                 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48                 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n";
49                 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
50                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
52                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
53                 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
54                 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
55                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MetaStatsCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string MetaStatsCommand::getOutputPattern(string type) {
66     try {
67         string pattern = "";
68         
69         if (type == "metastats") {  pattern = "[filename],[distance],[group],metastats"; } 
70         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
71         
72         return pattern;
73     }
74     catch(exception& e) {
75         m->errorOut(e, "MetaStatsCommand", "getOutputPattern");
76         exit(1);
77     }
78 }
79 //**********************************************************************************************************************
80 MetaStatsCommand::MetaStatsCommand(){   
81         try {
82                 abort = true; calledHelp = true;
83                 setParameters();
84                 vector<string> tempOutNames;
85                 outputTypes["metastats"] = tempOutNames;
86         }
87         catch(exception& e) {
88                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
89                 exit(1);
90         }
91 }
92 //**********************************************************************************************************************
93
94 MetaStatsCommand::MetaStatsCommand(string option) {
95         try {
96                 abort = false; calledHelp = false;   
97                 allLines = 1;
98                 
99                 
100                 //allow user to run help
101                 if(option == "help") { help(); abort = true; calledHelp = true; }
102                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
103                 
104                 else {
105                         vector<string> myArray = setParameters();
106                         
107                         OptionParser parser(option);
108                         map<string,string> parameters = parser.getParameters();
109                         
110                         ValidParameters validParameter;
111                         
112                         //check to make sure all parameters are valid for command
113                         map<string,string>::iterator it;
114                         for (it = parameters.begin(); it != parameters.end(); it++) { 
115                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
116                         }
117                         
118                         //initialize outputTypes
119                         vector<string> tempOutNames;
120                         outputTypes["metastats"] = tempOutNames;
121                         
122                         
123                         //if the user changes the input directory command factory will send this info to us in the output parameter 
124                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
125                         if (inputDir == "not found"){   inputDir = "";          }
126                         else {
127                                 string path;
128                                 it = parameters.find("design");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
134                                 }
135                                 
136                                 it = parameters.find("shared");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
142                                 }
143                                 
144                         }
145                         
146                         //check for required parameters
147                         sharedfile = validParameter.validFile(parameters, "shared", true);
148                         if (sharedfile == "not open") { abort = true; }
149                         else if (sharedfile == "not found") {                           //if there is a current shared file, use it
150                                 sharedfile = m->getSharedFile(); 
151                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
152                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
153                         }else { m->setSharedFile(sharedfile); }
154                         
155                         //check for required parameters
156                         designfile = validParameter.validFile(parameters, "design", true);
157                         if (designfile == "not open") { abort = true; }
158                         else if (designfile == "not found") {                           
159                                 //if there is a current design file, use it
160                                 designfile = m->getDesignFile(); 
161                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
162                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
163                         }else { m->setDesignFile(designfile); }
164                         
165                         //if the user changes the output directory command factory will send this info to us in the output parameter 
166                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
167                                 outputDir = ""; 
168                                 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it      
169                         }
170
171                         //check for optional parameter and set defaults
172                         // ...at some point should added some additional type checking...
173                         label = validParameter.validFile(parameters, "label", false);                   
174                         if (label == "not found") { label = ""; }
175                         else { 
176                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
177                                 else { allLines = 1;  }
178                         }
179                         
180                         groups = validParameter.validFile(parameters, "groups", false);                 
181                         if (groups == "not found") { groups = ""; pickedGroups = false; }
182                         else { 
183                                 pickedGroups = true;
184                                 m->splitAtDash(groups, Groups);
185                                 m->setGroups(Groups);
186                         }
187                         
188                         sets = validParameter.validFile(parameters, "sets", false);                     
189                         if (sets == "not found") { sets = ""; }
190                         else { 
191                                 m->splitAtDash(sets, Sets);
192                         }
193
194                         
195                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
196                         m->mothurConvert(temp, iters); 
197                         
198                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
199                         m->mothurConvert(temp, threshold); 
200                         
201                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
202                         m->setProcessors(temp);
203                         m->mothurConvert(temp, processors);
204                 }
205
206         }
207         catch(exception& e) {
208                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
209                 exit(1);
210         }
211 }
212 //**********************************************************************************************************************
213
214 int MetaStatsCommand::execute(){
215         try {
216         
217         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
218         
219         //just used to convert files to test metastats online
220         /****************************************************/
221         bool convertInputToShared = false;
222         convertSharedToInput = false;
223         if (convertInputToShared) { convertToShared(sharedfile); return 0; }
224         /****************************************************/
225                 
226                 designMap = new GroupMap(designfile);
227                 designMap->readDesignMap();
228         
229                 input = new InputData(sharedfile, "sharedfile");
230                 lookup = input->getSharedRAbundVectors();
231                 string lastLabel = lookup[0]->getLabel();
232                 
233                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
234                 set<string> processedLabels;
235                 set<string> userLabels = labels;
236                 
237                 //setup the pairwise comparions of sets for metastats
238                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
239                 //make sure sets are all in designMap
240                 SharedUtil* util = new SharedUtil(); 
241                 vector<string> dGroups = designMap->getNamesOfGroups();
242                 util->setGroups(Sets, dGroups);  
243                 delete util;
244                 
245                 int numGroups = Sets.size();
246                 for (int a=0; a<numGroups; a++) { 
247                         for (int l = 0; l < a; l++) {   
248                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
249                                 namesOfGroupCombos.push_back(groups);
250                         }
251                 }
252         
253                 
254                 //only 1 combo
255                 if (numGroups == 2) { processors = 1; }
256                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
257
258         if(processors != 1){
259             int numPairs = namesOfGroupCombos.size();
260             int numPairsPerProcessor = numPairs / processors;
261                         
262             for (int i = 0; i < processors; i++) {
263                 int startPos = i * numPairsPerProcessor;
264                 if(i == processors - 1){
265                     numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
266                 }
267                 lines.push_back(linePair(startPos, numPairsPerProcessor));
268             }
269         }
270                 
271                 //as long as you are not at the end of the file or done wih the lines you want
272                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
273                         
274                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
275         
276                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
277
278                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
279                                 process(lookup);
280                                 
281                                 processedLabels.insert(lookup[0]->getLabel());
282                                 userLabels.erase(lookup[0]->getLabel());
283                         }
284                         
285                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
286                                 string saveLabel = lookup[0]->getLabel();
287                         
288                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
289                                 lookup = input->getSharedRAbundVectors(lastLabel);
290                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
291                                 
292                                 process(lookup);
293                                 
294                                 processedLabels.insert(lookup[0]->getLabel());
295                                 userLabels.erase(lookup[0]->getLabel());
296                                 
297                                 //restore real lastlabel to save below
298                                 lookup[0]->setLabel(saveLabel);
299                         }
300                         
301                         lastLabel = lookup[0]->getLabel();
302                         //prevent memory leak
303                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
304                         
305                         if (m->control_pressed) {  outputTypes.clear(); m->clearGroups(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); } return 0; }
306
307                         //get next line to process
308                         lookup = input->getSharedRAbundVectors();                               
309                 }
310                 
311                 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
312
313                 //output error messages about any remaining user labels
314                 set<string>::iterator it;
315                 bool needToRun = false;
316                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
317                         m->mothurOut("Your file does not include the label " + *it); 
318                         if (processedLabels.count(lastLabel) != 1) {
319                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
320                                 needToRun = true;
321                         }else {
322                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
323                         }
324                 }
325         
326                 //run last label if you need to
327                 if (needToRun == true)  {
328                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
329                         lookup = input->getSharedRAbundVectors(lastLabel);
330                         
331                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
332                         
333                         process(lookup);
334                         
335                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
336                 }
337         
338                 //reset groups parameter
339                 m->clearGroups();  
340                 delete input; 
341                 delete designMap;
342                 
343                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0;}
344                 
345                 m->mothurOutEndLine();
346                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
347                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
348                 m->mothurOutEndLine();
349                 
350                 return 0;
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "MetaStatsCommand", "execute");
354                 exit(1);
355         }
356 }
357 //**********************************************************************************************************************
358
359 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
360         try {
361                 
362                 
363                                 if(processors == 1){
364                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
365                                 }else{
366                                         int process = 1;
367                                         vector<int> processIDS;
368                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
369                                         //loop through and create all the processes you want
370                                         while (process != processors) {
371                                                 int pid = fork();
372                         
373                                                 if (pid > 0) {
374                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
375                                                         process++;
376                                                 }else if (pid == 0){
377                                                         driver(lines[process].start, lines[process].num, thisLookUp);
378                                                         exit(0);
379                                                 }else { 
380                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
381                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
382                                                         exit(0);
383                                                 }
384                                         }
385                                         
386                                         //do my part
387                                         driver(lines[0].start, lines[0].num, thisLookUp);
388                 
389                                         //force parent to wait until all the processes are done
390                                         for (int i=0;i<(processors-1);i++) { 
391                                                 int temp = processIDS[i];
392                                                 wait(&temp);
393                                         }
394         #else
395                     
396                     //////////////////////////////////////////////////////////////////////////////////////////////////////
397                     //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
398                     //Above fork() will clone, so memory is separate, but that's not the case with windows, 
399                     //Taking advantage of shared memory to pass results vectors.
400                     //////////////////////////////////////////////////////////////////////////////////////////////////////
401                     
402                     vector<metastatsData*> pDataArray; 
403                     DWORD   dwThreadIdArray[processors-1];
404                     HANDLE  hThreadArray[processors-1]; 
405                     
406                     //Create processor worker threads.
407                     for( int i=1; i<processors; i++ ){
408                         
409                         //make copy of lookup so we don't get access violations
410                         vector<SharedRAbundVector*> newLookup;
411                         vector<string> designMapGroups;
412                         for (int k = 0; k < thisLookUp.size(); k++) {
413                             SharedRAbundVector* temp = new SharedRAbundVector();
414                             temp->setLabel(thisLookUp[k]->getLabel());
415                             temp->setGroup(thisLookUp[k]->getGroup());
416                             newLookup.push_back(temp);
417                             designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
418                         }
419                         
420                         //for each bin
421                         for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
422                             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
423                             for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
424                         }
425                         
426                         // Allocate memory for thread data.
427                         metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
428                         pDataArray.push_back(tempSum);
429                         processIDS.push_back(i);
430                         
431                         hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
432                     }
433                     
434                     //do my part
435                                         driver(lines[0].start, lines[0].num, thisLookUp);
436                     
437                     //Wait until all threads have terminated.
438                     WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
439                     
440                     //Close all thread handles and free memory allocations.
441                     for(int i=0; i < pDataArray.size(); i++){
442                         if (pDataArray[i]->count != (pDataArray[i]->num)) {
443                             m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
444                         }
445                         for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) {  delete pDataArray[i]->thisLookUp[j];  } 
446                         for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {  
447                             outputNames.push_back(pDataArray[i]->outputNames[j]);
448                             outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
449                         }
450                                                 
451                         CloseHandle(hThreadArray[i]);
452                         delete pDataArray[i];
453                     }
454         #endif
455
456                                 }
457                 
458                 return 0;
459                 
460         }
461         catch(exception& e) {
462                 m->errorOut(e, "MetaStatsCommand", "process");
463                 exit(1);
464         }
465 }
466 //**********************************************************************************************************************
467 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
468         try {
469         
470                 //for each combo
471                 for (int c = start; c < (start+num); c++) {
472                         
473                         //get set names
474                         string setA = namesOfGroupCombos[c][0];
475                         string setB = namesOfGroupCombos[c][1];
476                 
477                         //get filename
478             map<string, string> variables; 
479             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
480             variables["[distance]"] = thisLookUp[0]->getLabel();
481             variables["[group]"] = setA + "-" + setB;
482                         string outputFileName = getOutputFileName("metastats",variables);
483                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
484                         //int nameLength = outputFileName.length();
485                         //char * output = new char[nameLength];
486                         //strcpy(output, outputFileName.c_str());
487         
488                         //build matrix from shared rabunds
489                         //double** data;
490                         //data = new double*[thisLookUp[0]->getNumBins()];
491                         
492                         vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
493                         
494                         vector<SharedRAbundVector*> subset;
495                         int setACount = 0;
496                         int setBCount = 0;
497                         for (int i = 0; i < thisLookUp.size(); i++) {
498                                 string thisGroup = thisLookUp[i]->getGroup();
499                                 
500                                 //is this group for a set we want to compare??
501                                 //sorting the sets by putting setB at the back and setA in the front
502                                 if ((designMap->getGroup(thisGroup) == setB)) {  
503                                         subset.push_back(thisLookUp[i]);
504                                         setBCount++;
505                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
506                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
507                                         setACount++;
508                                 }
509                         }
510                                                 
511                         if ((setACount == 0) || (setBCount == 0))  { 
512                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
513                                 outputNames.pop_back();
514                         }else {
515                 
516                                 //fill data
517                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
518                                         //data[j] = new double[subset.size()];
519                                         data2[j].resize(subset.size(), 0.0);
520                    
521                                         for (int i = 0; i < subset.size(); i++) {
522                                                 data2[j][i] = (subset[i]->getAbundance(j));
523                                         }
524                                 }
525                                 
526                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
527                                 //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
528                 if (convertSharedToInput) { convertToInput(subset, outputFileName);  }
529                                 
530                                 m->mothurOutEndLine();
531                                 MothurMetastats mothurMeta(threshold, iters);
532                                 mothurMeta.runMetastats(outputFileName , data2, setACount);
533                                 m->mothurOutEndLine();
534                                 m->mothurOutEndLine(); 
535                         }
536                         
537                         //free memory
538                         //delete output;
539                         //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
540                         //delete[] data; 
541                 }
542                 
543                 return 0;
544
545         }
546         catch(exception& e) {
547                 m->errorOut(e, "MetaStatsCommand", "driver");
548                 exit(1);
549         }
550 }
551 //**********************************************************************************************************************
552 /*Metastats files look like:
553  13_0   14_0    13_52   14_52   70S     71S     72S     M1      M2      M3      C11     C12     C21     C15     C16     C19     C3      C4      C9
554  Alphaproteobacteria    0       0       0       0       0       0       5       0       0       0       0       0       0       0       0       0       0       0       0
555  Mollicutes     0       0       2       0       0       59      5       11      4       1       0       2       8       1       0       1       0       3       0
556  Verrucomicrobiae       0       0       0       0       0       1       6       0       0       0       0       0       0       0       0       0       0       0       0
557  Deltaproteobacteria    0       0       0       0       0       6       1       0       1       0       1       1       7       0       0       0       0       0       0
558  Cyanobacteria  0       0       1       0       0       0       1       0       0       0       0       0       0       0       0       0       0       0       0
559  Epsilonproteobacteria  0       0       0       0       0       0       0       0       6       0       0       3       1       0       0       0       0       0       0
560  Clostridia     75      65      207     226     801     280     267     210     162     197     81      120     106     148     120     94      84      98      121
561  Bacilli        3       2       16      8       21      52      31      70      46      65      4       28      5       23      62      26      20      30      25
562  Bacteroidetes (class)  21      25      22      64      226     193     296     172     98      55      19      149     201     85      50      76      113     92      82
563  Gammaproteobacteria    0       0       0       0       0       1       0       0       0       0       1       1       0       0       0       1       0       0       0
564  TM7_genera_incertae_sedis      0       0       0       0       0       0       0       0       1       0       1       2       0       2       0       0       0       0       0
565  Actinobacteria (class) 1       1       1       2       0       0       0       9       3       7       1       1       1       3       1       2       1       2       3
566  Betaproteobacteria     0       0       3       3       0       0       9       1       1       0       1       2       3       1       1       0       0       0       0
567 */
568 //this function is just used to convert files to test the differences between the metastats version and mothurs version
569 int MetaStatsCommand::convertToShared(string filename) {
570         try {
571         ifstream in;
572         m->openInputFile(filename, in);
573         
574         string header = m->getline(in); m->gobble(in);
575         
576         vector<string> groups = m->splitWhiteSpace(header);
577         vector<SharedRAbundFloatVector*> newLookup;
578         cout << groups.size() << endl;
579         for (int i = 0; i < groups.size(); i++) {
580             cout << "creating group " << groups[i] << endl;
581             SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
582             temp->setLabel("0.03");
583             temp->setGroup(groups[i]);
584             newLookup.push_back(temp);
585         }
586         
587         int otuCount = 0;
588         while (!in.eof()) {
589             if (m->control_pressed) { break; }
590             
591             string otuname;
592             in >> otuname; m->gobble(in);
593             otuCount++;
594             cout << otuname << endl;
595             for (int i = 0; i < groups.size(); i++) {
596                 double temp;
597                 in >> temp; m->gobble(in);
598                 newLookup[i]->push_back(temp, groups[i]);
599             }
600             m->gobble(in);
601         }
602         in.close();
603     
604         ofstream out;
605         m->openOutputFile(filename+".shared", out);
606         
607         out << "label\tgroup\tnumOTUs\t";
608         
609         string snumBins = toString(otuCount);
610         for (int i = 0; i < otuCount; i++) {
611             string binLabel = "Otu";
612             string sbinNumber = toString(i+1);
613             if (sbinNumber.length() < snumBins.length()) {
614                 int diff = snumBins.length() - sbinNumber.length();
615                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
616             }
617             binLabel += sbinNumber;
618             out << binLabel << '\t';
619         }
620         out << endl;
621         
622         for (int i = 0; i < groups.size(); i++) {
623             out << "0.03" << '\t' << groups[i] << '\t';
624             newLookup[i]->print(out);
625         }
626         out.close();
627         
628         cout << filename+".shared" << endl;
629         
630         return 0;
631     }
632         catch(exception& e) {
633                 m->errorOut(e, "MetaStatsCommand", "convertToShared");
634                 exit(1);
635         }
636 }
637 //**********************************************************************************************************************
638
639 int MetaStatsCommand::convertToInput(vector<SharedRAbundVector*>& subset, string thisfilename) {
640         try {
641         ofstream out;
642         m->openOutputFile(thisfilename+".matrix", out);
643         
644         out << "\t";
645         for (int i = 0; i < subset.size()-1; i++) {
646             out << subset[i]->getGroup() << '\t';
647         }
648         out << subset[subset.size()-1]->getGroup() << endl;
649         
650         for (int i = 0; i < subset[0]->getNumBins(); i++) {
651             out << m->currentSharedBinLabels[i] << '\t';
652             for (int j = 0; j < subset.size()-1; j++) {
653                 out << subset[j]->getAbundance(i) << '\t';
654             }
655             out << subset[subset.size()-1]->getAbundance(i) << endl;
656         }
657         out.close();
658         
659         cout << thisfilename+".matrix" << endl;
660         
661         return 0;
662     }
663         catch(exception& e) {
664                 m->errorOut(e, "MetaStatsCommand", "convertToInput");
665                 exit(1);
666         }
667 }
668
669 //**********************************************************************************************************************