]> git.donarmstrong.com Git - mothur.git/blob - metastatscommand.cpp
working on get.communitytype command
[mothur.git] / metastatscommand.cpp
1 /*
2  *  metastatscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/16/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "metastatscommand.h"
11 #include "sharedutilities.h"
12 #include "sharedrabundfloatvector.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> MetaStatsCommand::setParameters(){       
17         try {
18                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","metastats",false,true,true); parameters.push_back(pshared);
19                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
22                 CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold);
23                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
24                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
25                 CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MetaStatsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MetaStatsCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n";
43                 helpString += "The metastats command outputs a .metastats file. \n";
44                 helpString += "The metastats command parameters are shared, iters, threshold, groups, label, design, sets and processors.  The shared and design parameters are required, unless you have valid current files.\n";
45                 helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n";
46                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
47                 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48                 helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic.  The default is 1000. \n";
49                 helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n";
50                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
51                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
52                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
53                 helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n";
54                 helpString += "Example metastats(design=temp.design, groups=A-B-C).\n";
55                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MetaStatsCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string MetaStatsCommand::getOutputPattern(string type) {
66     try {
67         string pattern = "";
68         
69         if (type == "metastats") {  pattern = "[filename],[distance],[group],metastats"; } 
70         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
71         
72         return pattern;
73     }
74     catch(exception& e) {
75         m->errorOut(e, "MetaStatsCommand", "getOutputPattern");
76         exit(1);
77     }
78 }
79 //**********************************************************************************************************************
80 MetaStatsCommand::MetaStatsCommand(){   
81         try {
82                 abort = true; calledHelp = true;
83                 setParameters();
84                 vector<string> tempOutNames;
85                 outputTypes["metastats"] = tempOutNames;
86         }
87         catch(exception& e) {
88                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
89                 exit(1);
90         }
91 }
92 //**********************************************************************************************************************
93
94 MetaStatsCommand::MetaStatsCommand(string option) {
95         try {
96                 abort = false; calledHelp = false;   
97                 allLines = 1;
98                 
99                 
100                 //allow user to run help
101                 if(option == "help") { help(); abort = true; calledHelp = true; }
102                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
103                 
104                 else {
105                         vector<string> myArray = setParameters();
106                         
107                         OptionParser parser(option);
108                         map<string,string> parameters = parser.getParameters();
109                         
110                         ValidParameters validParameter;
111                         
112                         //check to make sure all parameters are valid for command
113                         map<string,string>::iterator it;
114                         for (it = parameters.begin(); it != parameters.end(); it++) { 
115                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
116                         }
117                         
118                         //initialize outputTypes
119                         vector<string> tempOutNames;
120                         outputTypes["metastats"] = tempOutNames;
121                         
122                         
123                         //if the user changes the input directory command factory will send this info to us in the output parameter 
124                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
125                         if (inputDir == "not found"){   inputDir = "";          }
126                         else {
127                                 string path;
128                                 it = parameters.find("design");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
134                                 }
135                                 
136                                 it = parameters.find("shared");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
142                                 }
143                                 
144                         }
145                         
146                         //check for required parameters
147                         sharedfile = validParameter.validFile(parameters, "shared", true);
148                         if (sharedfile == "not open") { abort = true; }
149                         else if (sharedfile == "not found") {                           //if there is a current shared file, use it
150                                 sharedfile = m->getSharedFile(); 
151                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
152                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
153                         }else { m->setSharedFile(sharedfile); }
154                         
155                         //check for required parameters
156                         designfile = validParameter.validFile(parameters, "design", true);
157                         if (designfile == "not open") { abort = true; }
158                         else if (designfile == "not found") {                           
159                                 //if there is a current design file, use it
160                                 designfile = m->getDesignFile(); 
161                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
162                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
163                         }else { m->setDesignFile(designfile); }
164                         
165                         //if the user changes the output directory command factory will send this info to us in the output parameter 
166                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
167                                 outputDir = ""; 
168                                 outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it      
169                         }
170
171                         //check for optional parameter and set defaults
172                         // ...at some point should added some additional type checking...
173                         label = validParameter.validFile(parameters, "label", false);                   
174                         if (label == "not found") { label = ""; }
175                         else { 
176                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
177                                 else { allLines = 1;  }
178                         }
179                         
180                         groups = validParameter.validFile(parameters, "groups", false);                 
181                         if (groups == "not found") { groups = ""; pickedGroups = false; }
182                         else { 
183                                 pickedGroups = true;
184                                 m->splitAtDash(groups, Groups);
185                                 m->setGroups(Groups);
186                         }
187                         
188                         sets = validParameter.validFile(parameters, "sets", false);                     
189                         if (sets == "not found") { sets = ""; }
190                         else { 
191                                 m->splitAtDash(sets, Sets);
192                         }
193
194                         
195                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
196                         m->mothurConvert(temp, iters); 
197                         
198                         temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
199                         m->mothurConvert(temp, threshold); 
200                         
201                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
202                         m->setProcessors(temp);
203                         m->mothurConvert(temp, processors);
204                 }
205
206         }
207         catch(exception& e) {
208                 m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand");
209                 exit(1);
210         }
211 }
212 //**********************************************************************************************************************
213
214 int MetaStatsCommand::execute(){
215         try {
216         
217         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
218         
219         //just used to convert files to test metastats online
220         /****************************************************/
221         bool convertInputToShared = false;
222         convertSharedToInput = false;
223         if (convertInputToShared) { convertToShared(sharedfile); return 0; }
224         /****************************************************/
225                 
226                 designMap = new GroupMap(designfile);
227                 designMap->readDesignMap();
228         
229                 input = new InputData(sharedfile, "sharedfile");
230                 lookup = input->getSharedRAbundVectors();
231                 string lastLabel = lookup[0]->getLabel();
232                 
233                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
234                 set<string> processedLabels;
235                 set<string> userLabels = labels;
236                 
237                 //setup the pairwise comparions of sets for metastats
238                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
239                 //make sure sets are all in designMap
240                 SharedUtil* util = new SharedUtil(); 
241                 vector<string> dGroups = designMap->getNamesOfGroups();
242                 util->setGroups(Sets, dGroups);  
243                 delete util;
244                 
245                 int numGroups = Sets.size();
246                 for (int a=0; a<numGroups; a++) { 
247                         for (int l = 0; l < a; l++) {   
248                                 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
249                                 namesOfGroupCombos.push_back(groups);
250                         }
251                 }
252         
253                 
254                 //only 1 combo
255                 if (numGroups == 2) { processors = 1; }
256                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
257
258         if(processors != 1){
259             int remainingPairs = namesOfGroupCombos.size();
260             int startIndex = 0;
261             for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
262                 int numPairs = remainingPairs; //case for last processor
263                 if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
264                 lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
265                 startIndex = startIndex + numPairs;
266                 remainingPairs = remainingPairs - numPairs;
267             }
268         }
269                 
270                 //as long as you are not at the end of the file or done wih the lines you want
271                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
272                         
273                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
274         
275                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
276
277                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
278                                 process(lookup);
279                                 
280                                 processedLabels.insert(lookup[0]->getLabel());
281                                 userLabels.erase(lookup[0]->getLabel());
282                         }
283                         
284                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
285                                 string saveLabel = lookup[0]->getLabel();
286                         
287                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
288                                 lookup = input->getSharedRAbundVectors(lastLabel);
289                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
290                                 
291                                 process(lookup);
292                                 
293                                 processedLabels.insert(lookup[0]->getLabel());
294                                 userLabels.erase(lookup[0]->getLabel());
295                                 
296                                 //restore real lastlabel to save below
297                                 lookup[0]->setLabel(saveLabel);
298                         }
299                         
300                         lastLabel = lookup[0]->getLabel();
301                         //prevent memory leak
302                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
303                         
304                         if (m->control_pressed) {  outputTypes.clear(); m->clearGroups(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); } return 0; }
305
306                         //get next line to process
307                         lookup = input->getSharedRAbundVectors();                               
308                 }
309                 
310                 if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
311
312                 //output error messages about any remaining user labels
313                 set<string>::iterator it;
314                 bool needToRun = false;
315                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
316                         m->mothurOut("Your file does not include the label " + *it); 
317                         if (processedLabels.count(lastLabel) != 1) {
318                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
319                                 needToRun = true;
320                         }else {
321                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
322                         }
323                 }
324         
325                 //run last label if you need to
326                 if (needToRun == true)  {
327                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
328                         lookup = input->getSharedRAbundVectors(lastLabel);
329                         
330                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
331                         
332                         process(lookup);
333                         
334                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
335                 }
336         
337                 //reset groups parameter
338                 m->clearGroups();  
339                 delete input; 
340                 delete designMap;
341                 
342                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0;}
343                 
344                 m->mothurOutEndLine();
345                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
346                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
347                 m->mothurOutEndLine();
348                 
349                 return 0;
350         }
351         catch(exception& e) {
352                 m->errorOut(e, "MetaStatsCommand", "execute");
353                 exit(1);
354         }
355 }
356 //**********************************************************************************************************************
357
358 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
359         try {
360                 
361                 
362                                 if(processors == 1){
363                                         driver(0, namesOfGroupCombos.size(), thisLookUp);
364                                 }else{
365                                         int process = 1;
366                                         vector<int> processIDS;
367                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
368                                         //loop through and create all the processes you want
369                                         while (process != processors) {
370                                                 int pid = fork();
371                         
372                                                 if (pid > 0) {
373                                                         processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
374                                                         process++;
375                                                 }else if (pid == 0){
376                                                         driver(lines[process].start, lines[process].num, thisLookUp);
377                                                         exit(0);
378                                                 }else { 
379                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
380                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
381                                                         exit(0);
382                                                 }
383                                         }
384                                         
385                                         //do my part
386                                         driver(lines[0].start, lines[0].num, thisLookUp);
387                 
388                                         //force parent to wait until all the processes are done
389                                         for (int i=0;i<(processors-1);i++) { 
390                                                 int temp = processIDS[i];
391                                                 wait(&temp);
392                                         }
393         #else
394                     
395                     //////////////////////////////////////////////////////////////////////////////////////////////////////
396                     //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
397                     //Above fork() will clone, so memory is separate, but that's not the case with windows, 
398                     //Taking advantage of shared memory to pass results vectors.
399                     //////////////////////////////////////////////////////////////////////////////////////////////////////
400                     
401                     vector<metastatsData*> pDataArray; 
402                     DWORD   dwThreadIdArray[processors-1];
403                     HANDLE  hThreadArray[processors-1]; 
404                     
405                     //Create processor worker threads.
406                     for( int i=1; i<processors; i++ ){
407                         
408                         //make copy of lookup so we don't get access violations
409                         vector<SharedRAbundVector*> newLookup;
410                         vector<string> designMapGroups;
411                         for (int k = 0; k < thisLookUp.size(); k++) {
412                             SharedRAbundVector* temp = new SharedRAbundVector();
413                             temp->setLabel(thisLookUp[k]->getLabel());
414                             temp->setGroup(thisLookUp[k]->getGroup());
415                             newLookup.push_back(temp);
416                             designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
417                         }
418                         
419                         //for each bin
420                         for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
421                             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
422                             for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
423                         }
424                         
425                         // Allocate memory for thread data.
426                         metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
427                         pDataArray.push_back(tempSum);
428                         processIDS.push_back(i);
429                         
430                         hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
431                     }
432                     
433                     //do my part
434                                         driver(lines[0].start, lines[0].num, thisLookUp);
435                     
436                     //Wait until all threads have terminated.
437                     WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
438                     
439                     //Close all thread handles and free memory allocations.
440                     for(int i=0; i < pDataArray.size(); i++){
441                         if (pDataArray[i]->count != (pDataArray[i]->num)) {
442                             m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
443                         }
444                         for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) {  delete pDataArray[i]->thisLookUp[j];  } 
445                         for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {  
446                             outputNames.push_back(pDataArray[i]->outputNames[j]);
447                             outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
448                         }
449                                                 
450                         CloseHandle(hThreadArray[i]);
451                         delete pDataArray[i];
452                     }
453         #endif
454
455                                 }
456                 
457                 return 0;
458                 
459         }
460         catch(exception& e) {
461                 m->errorOut(e, "MetaStatsCommand", "process");
462                 exit(1);
463         }
464 }
465 //**********************************************************************************************************************
466 int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { 
467         try {
468         
469                 //for each combo
470                 for (int c = start; c < (start+num); c++) {
471                         
472                         //get set names
473                         string setA = namesOfGroupCombos[c][0];
474                         string setB = namesOfGroupCombos[c][1];
475                 
476                         //get filename
477             map<string, string> variables; 
478             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
479             variables["[distance]"] = thisLookUp[0]->getLabel();
480             variables["[group]"] = setA + "-" + setB;
481                         string outputFileName = getOutputFileName("metastats",variables);
482                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
483                         //int nameLength = outputFileName.length();
484                         //char * output = new char[nameLength];
485                         //strcpy(output, outputFileName.c_str());
486         
487                         //build matrix from shared rabunds
488                         //double** data;
489                         //data = new double*[thisLookUp[0]->getNumBins()];
490                         
491                         vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
492                         
493                         vector<SharedRAbundVector*> subset;
494                         int setACount = 0;
495                         int setBCount = 0;
496                         for (int i = 0; i < thisLookUp.size(); i++) {
497                                 string thisGroup = thisLookUp[i]->getGroup();
498                                 
499                                 //is this group for a set we want to compare??
500                                 //sorting the sets by putting setB at the back and setA in the front
501                                 if ((designMap->getGroup(thisGroup) == setB)) {  
502                                         subset.push_back(thisLookUp[i]);
503                                         setBCount++;
504                                 }else if ((designMap->getGroup(thisGroup) == setA)) {
505                                         subset.insert(subset.begin()+setACount, thisLookUp[i]);
506                                         setACount++;
507                                 }
508                         }
509                                                 
510                         if ((setACount == 0) || (setBCount == 0))  { 
511                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
512                                 outputNames.pop_back();
513                         }else {
514                 
515                                 //fill data
516                                 for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
517                                         //data[j] = new double[subset.size()];
518                                         data2[j].resize(subset.size(), 0.0);
519                    
520                                         for (int i = 0; i < subset.size(); i++) {
521                                                 data2[j][i] = (subset[i]->getAbundance(j));
522                                         }
523                                 }
524                                 
525                                 m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
526                                 //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
527                 if (convertSharedToInput) { convertToInput(subset, outputFileName);  }
528                                 
529                                 m->mothurOutEndLine();
530                                 MothurMetastats mothurMeta(threshold, iters);
531                                 mothurMeta.runMetastats(outputFileName , data2, setACount);
532                                 m->mothurOutEndLine();
533                                 m->mothurOutEndLine(); 
534                         }
535                         
536                         //free memory
537                         //delete output;
538                         //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
539                         //delete[] data; 
540                 }
541                 
542                 return 0;
543
544         }
545         catch(exception& e) {
546                 m->errorOut(e, "MetaStatsCommand", "driver");
547                 exit(1);
548         }
549 }
550 //**********************************************************************************************************************
551 /*Metastats files look like:
552  13_0   14_0    13_52   14_52   70S     71S     72S     M1      M2      M3      C11     C12     C21     C15     C16     C19     C3      C4      C9
553  Alphaproteobacteria    0       0       0       0       0       0       5       0       0       0       0       0       0       0       0       0       0       0       0
554  Mollicutes     0       0       2       0       0       59      5       11      4       1       0       2       8       1       0       1       0       3       0
555  Verrucomicrobiae       0       0       0       0       0       1       6       0       0       0       0       0       0       0       0       0       0       0       0
556  Deltaproteobacteria    0       0       0       0       0       6       1       0       1       0       1       1       7       0       0       0       0       0       0
557  Cyanobacteria  0       0       1       0       0       0       1       0       0       0       0       0       0       0       0       0       0       0       0
558  Epsilonproteobacteria  0       0       0       0       0       0       0       0       6       0       0       3       1       0       0       0       0       0       0
559  Clostridia     75      65      207     226     801     280     267     210     162     197     81      120     106     148     120     94      84      98      121
560  Bacilli        3       2       16      8       21      52      31      70      46      65      4       28      5       23      62      26      20      30      25
561  Bacteroidetes (class)  21      25      22      64      226     193     296     172     98      55      19      149     201     85      50      76      113     92      82
562  Gammaproteobacteria    0       0       0       0       0       1       0       0       0       0       1       1       0       0       0       1       0       0       0
563  TM7_genera_incertae_sedis      0       0       0       0       0       0       0       0       1       0       1       2       0       2       0       0       0       0       0
564  Actinobacteria (class) 1       1       1       2       0       0       0       9       3       7       1       1       1       3       1       2       1       2       3
565  Betaproteobacteria     0       0       3       3       0       0       9       1       1       0       1       2       3       1       1       0       0       0       0
566 */
567 //this function is just used to convert files to test the differences between the metastats version and mothurs version
568 int MetaStatsCommand::convertToShared(string filename) {
569         try {
570         ifstream in;
571         m->openInputFile(filename, in);
572         
573         string header = m->getline(in); m->gobble(in);
574         
575         vector<string> groups = m->splitWhiteSpace(header);
576         vector<SharedRAbundFloatVector*> newLookup;
577         cout << groups.size() << endl;
578         for (int i = 0; i < groups.size(); i++) {
579             cout << "creating group " << groups[i] << endl;
580             SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
581             temp->setLabel("0.03");
582             temp->setGroup(groups[i]);
583             newLookup.push_back(temp);
584         }
585         
586         int otuCount = 0;
587         while (!in.eof()) {
588             if (m->control_pressed) { break; }
589             
590             string otuname;
591             in >> otuname; m->gobble(in);
592             otuCount++;
593             cout << otuname << endl;
594             for (int i = 0; i < groups.size(); i++) {
595                 double temp;
596                 in >> temp; m->gobble(in);
597                 newLookup[i]->push_back(temp, groups[i]);
598             }
599             m->gobble(in);
600         }
601         in.close();
602     
603         ofstream out;
604         m->openOutputFile(filename+".shared", out);
605         
606         out << "label\tgroup\tnumOTUs\t";
607         
608         string snumBins = toString(otuCount);
609         for (int i = 0; i < otuCount; i++) {
610             string binLabel = "Otu";
611             string sbinNumber = toString(i+1);
612             if (sbinNumber.length() < snumBins.length()) {
613                 int diff = snumBins.length() - sbinNumber.length();
614                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
615             }
616             binLabel += sbinNumber;
617             out << binLabel << '\t';
618         }
619         out << endl;
620         
621         for (int i = 0; i < groups.size(); i++) {
622             out << "0.03" << '\t' << groups[i] << '\t';
623             newLookup[i]->print(out);
624         }
625         out.close();
626         
627         cout << filename+".shared" << endl;
628         
629         return 0;
630     }
631         catch(exception& e) {
632                 m->errorOut(e, "MetaStatsCommand", "convertToShared");
633                 exit(1);
634         }
635 }
636 //**********************************************************************************************************************
637
638 int MetaStatsCommand::convertToInput(vector<SharedRAbundVector*>& subset, string thisfilename) {
639         try {
640         ofstream out;
641         m->openOutputFile(thisfilename+".matrix", out);
642         
643         out << "\t";
644         for (int i = 0; i < subset.size()-1; i++) {
645             out << subset[i]->getGroup() << '\t';
646         }
647         out << subset[subset.size()-1]->getGroup() << endl;
648         
649         for (int i = 0; i < subset[0]->getNumBins(); i++) {
650             out << m->currentSharedBinLabels[i] << '\t';
651             for (int j = 0; j < subset.size()-1; j++) {
652                 out << subset[j]->getAbundance(i) << '\t';
653             }
654             out << subset[subset.size()-1]->getAbundance(i) << endl;
655         }
656         out.close();
657         
658         cout << thisfilename+".matrix" << endl;
659         
660         return 0;
661     }
662         catch(exception& e) {
663                 m->errorOut(e, "MetaStatsCommand", "convertToInput");
664                 exit(1);
665         }
666 }
667
668 //**********************************************************************************************************************