]> git.donarmstrong.com Git - mothur.git/blob - homovacommand.cpp
working on anosim command
[mothur.git] / homovacommand.cpp
1 /*
2  *  homovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/8/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "homovacommand.h"
11 #include "sharedutilities.h"
12 #include "sharedsobscollectsummary.h"
13 #include "sharedchao1.h"
14 #include "sharedace.h"
15 #include "sharednseqs.h"
16 #include "sharedjabund.h"
17 #include "sharedsorabund.h"
18 #include "sharedjclass.h"
19 #include "sharedsorclass.h"
20 #include "sharedjest.h"
21 #include "sharedsorest.h"
22 #include "sharedthetayc.h"
23 #include "sharedthetan.h"
24 #include "sharedkstest.h"
25 #include "whittaker.h"
26 #include "sharedochiai.h"
27 #include "sharedanderbergs.h"
28 #include "sharedkulczynski.h"
29 #include "sharedkulczynskicody.h"
30 #include "sharedlennon.h"
31 #include "sharedmorisitahorn.h"
32 #include "sharedbraycurtis.h"
33 #include "sharedjackknife.h"
34 #include "whittaker.h"
35 #include "odum.h"
36 #include "canberra.h"
37 #include "structeuclidean.h"
38 #include "structchord.h"
39 #include "hellinger.h"
40 #include "manhattan.h"
41 #include "structpearson.h"
42 #include "soergel.h"
43 #include "spearman.h"
44 #include "structkulczynski.h"
45 #include "structchi2.h"
46 #include "speciesprofile.h"
47 #include "hamming.h"
48 #include "gower.h"
49 #include "memchi2.h"
50 #include "memchord.h"
51 #include "memeuclidean.h"
52 #include "mempearson.h"
53
54 //**********************************************************************************************************************
55 vector<string> HomovaCommand::getValidParameters(){     
56         try {
57                 string Array[] =  {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
58                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
59                 return myArray;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "HomovaCommand", "getValidParameters");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 HomovaCommand::HomovaCommand(){ 
68         try {
69                 abort = true; calledHelp = true; 
70                 vector<string> tempOutNames;
71                 outputTypes["homova"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 vector<string> HomovaCommand::getRequiredParameters(){  
80         try {
81                 string Array[] =  {"design"};
82                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
83                 return myArray;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "HomovaCommand", "getRequiredParameters");
87                 exit(1);
88         }
89 }
90 //**********************************************************************************************************************
91 vector<string> HomovaCommand::getRequiredFiles(){       
92         try {
93                 string Array[] =  {};
94                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
95                 return myArray;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "HomovaCommand", "getRequiredFiles");
99                 exit(1);
100         }
101 }
102 //**********************************************************************************************************************
103
104 HomovaCommand::HomovaCommand(string option) {
105         try {
106                 globaldata = GlobalData::getInstance();
107                 abort = false; calledHelp = false;   
108                 allLines = 1;
109                 labels.clear();
110                 
111                 //allow user to run help
112                 if(option == "help") { help(); abort = true; calledHelp = true; }
113                 
114                 else {
115                         //valid paramters for this command
116                         string AlignArray[] =  {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
117                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
118                         
119                         OptionParser parser(option);
120                         map<string,string> parameters = parser.getParameters();
121                         
122                         ValidParameters validParameter;
123                         
124                         //check to make sure all parameters are valid for command
125                         map<string,string>::iterator it;
126                         for (it = parameters.begin(); it != parameters.end(); it++) { 
127                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
128                         }
129                         
130                         //initialize outputTypes
131                         vector<string> tempOutNames;
132                         outputTypes["homova"] = tempOutNames;
133                         
134                         //if the user changes the output directory command factory will send this info to us in the output parameter 
135                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
136                         
137                         //if the user changes the input directory command factory will send this info to us in the output parameter 
138                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
139                         if (inputDir == "not found"){   inputDir = "";          }
140                         else {
141                                 string path;
142                                 it = parameters.find("design");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
148                                 }
149                                 
150                                 it = parameters.find("phylip");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
156                                 }
157                         }
158                         
159                         phylipfile = validParameter.validFile(parameters, "phylip", true);
160                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
161                         else if (phylipfile == "not found") { phylipfile = ""; }        
162                         else {  globaldata->newRead(); format = "phylip";  globaldata->setPhylipFile(phylipfile);       }
163                         
164                         //check for required parameters
165                         designfile = validParameter.validFile(parameters, "design", true);
166                         if (designfile == "not open") { abort = true; }
167                         else if (designfile == "not found") {  designfile = "";  m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }       
168                         
169                         //make sure the user has already run the read.otu command
170                         if ((globaldata->getSharedFile() == "")) {
171                                 if ((phylipfile == "")) {
172                                         m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the homova command."); m->mothurOutEndLine(); abort = true; 
173                                 }
174                         }else { sharedfile = globaldata->getSharedFile(); } 
175                         
176                         //use distance matrix if one is provided
177                         if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
178                         
179                         //check for optional parameter and set defaults
180                         // ...at some point should added some additional type checking...
181                         label = validParameter.validFile(parameters, "label", false);                   
182                         if (label == "not found") { label = ""; }
183                         else { 
184                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
185                                 else { allLines = 1;  }
186                         }
187                         
188                         //if the user has not specified any labels use the ones from read.otu
189                         if (label == "") {  
190                                 allLines = globaldata->allLines; 
191                                 labels = globaldata->labels; 
192                         }
193                         
194                         groups = validParameter.validFile(parameters, "groups", false);                 
195                         if (groups == "not found") { groups = "";  }
196                         else { 
197                                 m->splitAtDash(groups, Groups);
198                                 globaldata->Groups = Groups;
199                         }
200                         
201                         sets = validParameter.validFile(parameters, "sets", false);                     
202                         if (sets == "not found") { sets = ""; pickedGroups = false; }
203                         else { 
204                                 pickedGroups = true;
205                                 m->splitAtDash(sets, Sets);
206                         }
207                         
208                         
209                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
210                         convert(temp, iters); 
211                         
212                         temp = validParameter.validFile(parameters, "processors", false);                       if (temp == "not found"){       temp = "1";     }
213                         convert(temp, processors); 
214                         
215                         vector<string> Estimators;
216                         calc = validParameter.validFile(parameters, "calc", false);                     
217                         if (calc == "not found") { calc = "morisitahorn";  }
218                         m->splitAtDash(calc, Estimators);
219                         
220                         if (abort == false) {
221                                 
222                                 ValidCalculators* validCalculator = new ValidCalculators();
223                                 
224                                 for (int i=0; i<Estimators.size(); i++) {
225                                         if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) { 
226                                                 if (Estimators[i] == "sharedsobs") { 
227                                                         calculators.push_back(new SharedSobsCS());
228                                                 }else if (Estimators[i] == "sharedchao") { 
229                                                         calculators.push_back(new SharedChao1());
230                                                 }else if (Estimators[i] == "sharedace") { 
231                                                         calculators.push_back(new SharedAce());
232                                                 }else if (Estimators[i] == "jabund") {  
233                                                         calculators.push_back(new JAbund());
234                                                 }else if (Estimators[i] == "sorabund") { 
235                                                         calculators.push_back(new SorAbund());
236                                                 }else if (Estimators[i] == "jclass") { 
237                                                         calculators.push_back(new Jclass());
238                                                 }else if (Estimators[i] == "sorclass") { 
239                                                         calculators.push_back(new SorClass());
240                                                 }else if (Estimators[i] == "jest") { 
241                                                         calculators.push_back(new Jest());
242                                                 }else if (Estimators[i] == "sorest") { 
243                                                         calculators.push_back(new SorEst());
244                                                 }else if (Estimators[i] == "thetayc") { 
245                                                         calculators.push_back(new ThetaYC());
246                                                 }else if (Estimators[i] == "thetan") { 
247                                                         calculators.push_back(new ThetaN());
248                                                 }else if (Estimators[i] == "kstest") { 
249                                                         calculators.push_back(new KSTest());
250                                                 }else if (Estimators[i] == "sharednseqs") { 
251                                                         calculators.push_back(new SharedNSeqs());
252                                                 }else if (Estimators[i] == "ochiai") { 
253                                                         calculators.push_back(new Ochiai());
254                                                 }else if (Estimators[i] == "anderberg") { 
255                                                         calculators.push_back(new Anderberg());
256                                                 }else if (Estimators[i] == "kulczynski") { 
257                                                         calculators.push_back(new Kulczynski());
258                                                 }else if (Estimators[i] == "kulczynskicody") { 
259                                                         calculators.push_back(new KulczynskiCody());
260                                                 }else if (Estimators[i] == "lennon") { 
261                                                         calculators.push_back(new Lennon());
262                                                 }else if (Estimators[i] == "morisitahorn") { 
263                                                         calculators.push_back(new MorHorn());
264                                                 }else if (Estimators[i] == "braycurtis") { 
265                                                         calculators.push_back(new BrayCurtis());
266                                                 }else if (Estimators[i] == "whittaker") { 
267                                                         calculators.push_back(new Whittaker());
268                                                 }else if (Estimators[i] == "odum") { 
269                                                         calculators.push_back(new Odum());
270                                                 }else if (Estimators[i] == "canberra") { 
271                                                         calculators.push_back(new Canberra());
272                                                 }else if (Estimators[i] == "structeuclidean") { 
273                                                         calculators.push_back(new StructEuclidean());
274                                                 }else if (Estimators[i] == "structchord") { 
275                                                         calculators.push_back(new StructChord());
276                                                 }else if (Estimators[i] == "hellinger") { 
277                                                         calculators.push_back(new Hellinger());
278                                                 }else if (Estimators[i] == "manhattan") { 
279                                                         calculators.push_back(new Manhattan());
280                                                 }else if (Estimators[i] == "structpearson") { 
281                                                         calculators.push_back(new StructPearson());
282                                                 }else if (Estimators[i] == "soergel") { 
283                                                         calculators.push_back(new Soergel());
284                                                 }else if (Estimators[i] == "spearman") { 
285                                                         calculators.push_back(new Spearman());
286                                                 }else if (Estimators[i] == "structkulczynski") { 
287                                                         calculators.push_back(new StructKulczynski());
288                                                 }else if (Estimators[i] == "speciesprofile") { 
289                                                         calculators.push_back(new SpeciesProfile());
290                                                 }else if (Estimators[i] == "hamming") { 
291                                                         calculators.push_back(new Hamming());
292                                                 }else if (Estimators[i] == "structchi2") { 
293                                                         calculators.push_back(new StructChi2());
294                                                 }else if (Estimators[i] == "gower") { 
295                                                         calculators.push_back(new Gower());
296                                                 }else if (Estimators[i] == "memchi2") { 
297                                                         calculators.push_back(new MemChi2());
298                                                 }else if (Estimators[i] == "memchord") { 
299                                                         calculators.push_back(new MemChord());
300                                                 }else if (Estimators[i] == "memeuclidean") { 
301                                                         calculators.push_back(new MemEuclidean());
302                                                 }else if (Estimators[i] == "mempearson") { 
303                                                         calculators.push_back(new MemPearson());
304                                                 }
305                                         }
306                                 }
307                         }
308                 }
309                 
310         }
311         catch(exception& e) {
312                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
313                 exit(1);
314         }
315 }
316
317 //**********************************************************************************************************************
318
319 void HomovaCommand::help(){
320         try {
321                 m->mothurOut("Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n");
322                 m->mothurOut("The homova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
323                 m->mothurOut("The homova command outputs a .homova file. \n");
324                 m->mothurOut("The homova command parameters are phylip, iters, groups, label, design, sets and processors.  The design parameter is required.\n");
325                 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running homova. It is required. \n");
326                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
327                 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile. To run the pairwise comparisons of all sets use sets=all.\n");
328                 m->mothurOut("The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n");
329                 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
330                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
331                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
332                 m->mothurOut("The homova command should be in the following format: homova(design=yourDesignFile).\n");
333                 m->mothurOut("Example amova(design=temp.design, groups=A-B-C).\n");
334                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
335                 
336         }
337         catch(exception& e) {
338                 m->errorOut(e, "HomovaCommand", "help");
339                 exit(1);
340         }
341 }
342
343 //**********************************************************************************************************************
344
345 HomovaCommand::~HomovaCommand(){}
346
347 //**********************************************************************************************************************
348
349 int HomovaCommand::execute(){
350         try {
351                 
352                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
353                 
354                 //read design file
355                 designMap = new GroupMap(designfile);
356                 designMap->readDesignMap();
357                 
358                 //make sure sets are all in designMap
359                 SharedUtil* util = new SharedUtil();  
360                 util->setGroups(Sets, designMap->namesOfGroups);  
361                 delete util;
362                 
363                 //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
364                 int numGroups = Sets.size();
365                 if (pickedGroups) {
366                         for (int a=0; a<numGroups; a++) { 
367                                 for (int l = 0; l < a; l++) {   
368                                         vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
369                                         namesOfGroupCombos.push_back(groups);
370                                 }
371                         }
372                 }else { //one giant compare
373                         vector<string> groups;
374                         for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
375                         namesOfGroupCombos.push_back(groups);
376                 }
377                 
378                 //only 1 combo
379                 if (numGroups == 2) { processors = 1; }
380                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
381                 
382 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
383                 if(processors != 1){
384                         int numPairs = namesOfGroupCombos.size();
385                         int numPairsPerProcessor = numPairs / processors;
386                         
387                         for (int i = 0; i < processors; i++) {
388                                 int startPos = i * numPairsPerProcessor;
389                                 if(i == processors - 1){
390                                         numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
391                                 }
392                                 lines.push_back(linePair(startPos, numPairsPerProcessor));
393                         }
394                 }
395 #endif
396                 
397                 if (sharedfile != "") { //create distance matrix for each label
398                         
399                         if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
400                         
401                         //for each calculator
402                         for(int i = 0 ; i < calculators.size(); i++) {
403                                 
404                                 //create a new filename
405                                 ofstream out;
406                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".homova";                           
407                                 m->openOutputFile(outputFile, out);
408                                 outputNames.push_back(outputFile); outputTypes["homova"].push_back(outputFile);
409                                 
410                                 //print headers
411                                 out << "label\tgroupsCompared\tBValue\tpValue" << endl;  
412                                 out.close();
413                         }
414                         
415                         InputData input(sharedfile, "sharedfile");
416                         vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
417                         string lastLabel = lookup[0]->getLabel();
418                         
419                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
420                         set<string> processedLabels;
421                         set<string> userLabels = labels;
422                         
423                         //sanity check between shared file groups and design file groups
424                         for (int i = 0; i < lookup.size(); i++) { 
425                                 string group = designMap->getGroup(lookup[i]->getGroup());
426                                 
427                                 if (group == "not found") { m->control_pressed = true;  m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine();  }
428                         }
429                         
430                         //as long as you are not at the end of the file or done wih the lines you want
431                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
432                                 
433                                 if (m->control_pressed) {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } return 0; }
434                                 
435                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
436                                         
437                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
438                                         process(lookup);
439                                         
440                                         processedLabels.insert(lookup[0]->getLabel());
441                                         userLabels.erase(lookup[0]->getLabel());
442                                 }
443                                 
444                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
445                                         string saveLabel = lookup[0]->getLabel();
446                                         
447                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
448                                         lookup = input.getSharedRAbundVectors(lastLabel);
449                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
450                                         
451                                         process(lookup);
452                                         
453                                         processedLabels.insert(lookup[0]->getLabel());
454                                         userLabels.erase(lookup[0]->getLabel());
455                                         
456                                         //restore real lastlabel to save below
457                                         lookup[0]->setLabel(saveLabel);
458                                 }
459                                 
460                                 lastLabel = lookup[0]->getLabel();
461                                 //prevent memory leak
462                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
463                                 
464                                 if (m->control_pressed) {  globaldata->Groups.clear();   delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str()); } return 0; }
465                                 
466                                 //get next line to process
467                                 lookup = input.getSharedRAbundVectors();                                
468                         }
469                         
470                         if (m->control_pressed) { globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }  return 0; }
471                         
472                         //output error messages about any remaining user labels
473                         set<string>::iterator it;
474                         bool needToRun = false;
475                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
476                                 m->mothurOut("Your file does not include the label " + *it); 
477                                 if (processedLabels.count(lastLabel) != 1) {
478                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
479                                         needToRun = true;
480                                 }else {
481                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
482                                 }
483                         }
484                         
485                         //run last label if you need to
486                         if (needToRun == true)  {
487                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
488                                 lookup = input.getSharedRAbundVectors(lastLabel);
489                                 
490                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
491                                 
492                                 process(lookup);
493                                 
494                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
495                         }
496                         
497                         //reset groups parameter
498                         globaldata->Groups.clear();  
499                         
500                         
501                 }else { //user provided distance matrix
502                         
503                         if (outputDir == "") { outputDir = m->hasPath(phylipfile); }
504                         
505                         //create a new filename
506                         ofstream out;
507                         string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile))  + "homova";                               
508                         m->openOutputFile(outputFile, out);
509                         outputNames.push_back(outputFile); outputTypes["homova"].push_back(outputFile);
510                         
511                         //print headers
512                         out << "groupsCompared\tBValue\tpValue" << endl;  
513                         out.close();
514                         
515                         ReadPhylipVector readMatrix(phylipfile);
516                         vector< vector<double> > completeMatrix;
517                         vector<string> names = readMatrix.read(completeMatrix);
518                         
519 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
520                         if(processors == 1){
521                                 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
522                         }else{
523                                 int process = 1;
524                                 vector<int> processIDS;
525                                 
526                                 //loop through and create all the processes you want
527                                 while (process != processors) {
528                                         int pid = fork();
529                                         
530                                         if (pid > 0) {
531                                                 processIDS.push_back(pid);  
532                                                 process++;
533                                         }else if (pid == 0){
534                                                 driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
535                                                 exit(0);
536                                         }else { 
537                                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
538                                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
539                                                 exit(0);
540                                         }
541                                 }
542                                 
543                                 //do my part
544                                 driver(lines[0].start, lines[0].num, names, "", completeMatrix);
545                                 
546                                 //force parent to wait until all the processes are done
547                                 for (int i=0;i<(processors-1);i++) { 
548                                         int temp = processIDS[i];
549                                         wait(&temp);
550                                 }
551                                 
552                                 //append files
553                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "homova";                                
554                                 for (int j = 0; j < processIDS.size(); j++) {
555                                         m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
556                                         remove((outputFile + toString(processIDS[j])).c_str());
557                                 }
558                                 
559                         }
560 #else
561                         driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
562 #endif
563                         
564                 }
565                 
566                 delete designMap;
567                 
568                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
569                 
570                 m->mothurOutEndLine();
571                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
572                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
573                 m->mothurOutEndLine();
574                 
575                 return 0;
576         }
577         catch(exception& e) {
578                 m->errorOut(e, "HomovaCommand", "execute");
579                 exit(1);
580         }
581 }
582 //**********************************************************************************************************************
583
584 int HomovaCommand::process(vector<SharedRAbundVector*> thisLookup) {
585         try{
586                 
587 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
588                 if(processors == 1){
589                         driver(0, namesOfGroupCombos.size(), thisLookup, "");
590                 }else{
591                         int process = 1;
592                         vector<int> processIDS;
593                         
594                         //loop through and create all the processes you want
595                         while (process != processors) {
596                                 int pid = fork();
597                                 
598                                 if (pid > 0) {
599                                         processIDS.push_back(pid);  
600                                         process++;
601                                 }else if (pid == 0){
602                                         driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
603                                         exit(0);
604                                 }else { 
605                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
606                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
607                                         exit(0);
608                                 }
609                         }
610                         
611                         //do my part
612                         driver(lines[0].start, lines[0].num, thisLookup, "");
613                         
614                         //force parent to wait until all the processes are done
615                         for (int i=0;i<(processors-1);i++) { 
616                                 int temp = processIDS[i];
617                                 wait(&temp);
618                         }
619                         
620                         //append files
621                         for(int i = 0 ; i < calculators.size(); i++) {
622                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".homova";                           
623                                 
624                                 for (int j = 0; j < processIDS.size(); j++) {
625                                         m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
626                                         remove((outputFile + toString(processIDS[j])).c_str());
627                                 }
628                         }
629                 }
630 #else
631                 driver(0, namesOfGroupCombos.size(), thisLookUp, "");
632 #endif
633                 
634                 return 0;
635         }
636         catch(exception& e) {
637                 m->errorOut(e, "HomovaCommand", "process");
638                 exit(1);
639         }
640 }
641 //**********************************************************************************************************************
642
643 int HomovaCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
644         try {
645                 vector<SharedRAbundVector*> subset;
646                 EstOutput data;
647                 
648                 //for each calculator
649                 for(int i = 0 ; i < calculators.size(); i++) {
650                         
651                         //create a new filename
652                         ofstream out;
653                         string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".homova" + pidValue;                                
654                         m->openOutputFileAppend(outputFile, out);
655                         out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
656                         
657                         //for each combo
658                         for (int c = start; c < (start+num); c++) {
659                                 
660                                 if (m->control_pressed) { out.close(); return 0; }
661                                 
662                                 //get set names
663                                 vector<string> setNames;
664                                 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
665                                 
666                                 vector<SharedRAbundVector*> thisCombosLookup;
667                                 vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
668                                 for (int k = 0; k < thisLookup.size(); k++) {
669                                         string thisGroup = thisLookup[k]->getGroup();
670                                         
671                                         //is this group for a set we want to compare??
672                                         if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {  
673                                                 thisCombosLookup.push_back(thisLookup[k]);
674                                                 thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
675                                         }
676                                         
677                                 }
678                                 
679                                 int numGroups = thisCombosLookup.size();
680                                 
681                                 //calc the distance matrix
682                                 matrix.clear();
683                                 matrix.resize(numGroups);
684                                 for (int k = 0; k < matrix.size(); k++) {       for (int j = 0; j < matrix.size(); j++) {       matrix[k].push_back(1.0);       }       }
685                                 
686                                 if (thisCombosLookup.size() == 0)  { 
687                                         m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine(); 
688                                 }else{
689                                         
690                                         out << thisLookup[0]->getLabel() << '\t';
691                                         if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
692                                         else { out << "all" << '\t'; }
693                                         
694                                         for (int k = 0; k < thisCombosLookup.size(); k++) { 
695                                                 for (int l = k; l < thisCombosLookup.size(); l++) {
696                                                         
697                                                         if (m->control_pressed) { out.close(); return 0; }
698                                                         
699                                                         if (k != l) { //we dont need to similiarity of a groups to itself
700                                                                 //get estimated similarity between 2 groups
701                                                                 subset.clear(); //clear out old pair of sharedrabunds
702                                                                 //add new pair of sharedrabunds
703                                                                 subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]); 
704                                                                 
705                                                                 //if this calc needs all groups to calculate the pair load all groups
706                                                                 if (calculators[i]->getNeedsAll()) { 
707                                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
708                                                                         for (int w = 0; w < thisCombosLookup.size(); w++) {
709                                                                                 if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
710                                                                         }
711                                                                 }
712                                                                 
713                                                                 data = calculators[i]->getValues(subset); //saves the calculator outputs
714                                                                 
715                                                                 //save values in similarity matrix
716                                                                 matrix[k][l] = 1.0 - data[0];
717                                                                 matrix[l][k] = 1.0 - data[0];
718                                                         }
719                                                 }
720                                         }
721                                         
722                                         //calc homova
723                                         calcHomova(out, setNames.size(), thisCombosLookupSets);
724                                 }
725                         }
726                         
727                         out.close();
728                 }               
729                 
730                 return 0;
731                 
732         }
733         catch(exception& e) {
734                 m->errorOut(e, "HomovaCommand", "driver");
735                 exit(1);
736         }
737 }
738 //**********************************************************************************************************************
739
740 int HomovaCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
741         try {
742                 
743                 //create a new filename
744                 ofstream out;
745                 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "homova" + pidValue;                             
746                 m->openOutputFileAppend(outputFile, out);
747                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
748                 
749                 //for each combo
750                 for (int c = start; c < (start+num); c++) {
751                         
752                         if (m->control_pressed) { out.close(); return 0; }
753                         
754                         //get set names
755                         vector<string> setNames;
756                         for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
757                         
758                         vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
759                         set<int> indexes;
760                         for (int k = 0; k < names.size(); k++) {
761                                 //is this group for a set we want to compare??
762                                 if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {  
763                                         thisCombosSets.push_back(designMap->getGroup(names[k]));        
764                                         indexes.insert(k); //save indexes of valid rows in matrix for submatrix
765                                 }
766                         }
767                         
768                         int numGroups = thisCombosSets.size();
769                         
770                         //calc the distance matrix
771                         matrix.clear();
772                         matrix.resize(numGroups);
773                         
774                         for (int k = 0; k < matrix.size(); k++) {       for (int j = 0; j < matrix.size(); j++) {       matrix[k].push_back(1.0);       }       }
775                         
776                         if (thisCombosSets.size() == 0)  { 
777                                 m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine(); 
778                         }else{
779                                 
780                                 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
781                                 else { out << "all" << '\t'; }
782                                 
783                                 //fill submatrix
784                                 int rowCount = 0;
785                                 for (int j = 0; j < completeMatrix.size(); j++) {
786                                         
787                                         if (indexes.count(j) != 0) { //we want at least part of this row
788                                                 int columnCount = 0;
789                                                 for (int k = 0; k < completeMatrix[j].size(); k++) {
790                                                         
791                                                         if (indexes.count(k) != 0) { //we want this distance
792                                                                 matrix[rowCount][columnCount] = completeMatrix[j][k];
793                                                                 matrix[columnCount][rowCount] = completeMatrix[j][k];
794                                                                 columnCount++;
795                                                         }
796                                                 }
797                                                 rowCount++;
798                                         }
799                                 }
800                                 
801                                 //calc homova
802                                 calcHomova(out, setNames.size(), thisCombosSets);
803                         }
804                 }
805                 
806                 out.close();
807                 
808                 
809                 return 0;
810                 
811         }
812         catch(exception& e) {
813                 m->errorOut(e, "HomovaCommand", "driver");
814                 exit(1);
815         }
816 }
817 //**********************************************************************************************************************
818 int HomovaCommand::calcHomova(ofstream& out, int numTreatments, vector<string> thisCombosLookupSets) {
819         try {
820                 
821                 double SSTotal, BValue, pValue;
822                 map<string, double> SSWithin;
823                 map<string, double>::iterator it;
824                 
825                 SSTotal = calcWithin(matrix, numTreatments, thisCombosLookupSets);
826                 int numSamples = matrix.size();
827                 
828                 //calc BValue
829                 map<string, int> counts;
830                 SSWithin = calcWithinEach(matrix, numTreatments, thisCombosLookupSets, counts);
831                                 
832                 double sum = 0.0;
833                 double sumDenom = 0.0;
834                 for (it = SSWithin.begin(); it != SSWithin.end(); it++) {
835                         double temp2 = (it->second) / (double) (counts[it->first] - 1);
836                         sum += ((counts[it->first] - 1) * log(temp2));
837                         sumDenom += ((1 / (double) (counts[it->first] - 1)) - ( 1 / (double) (numSamples - numTreatments) ));
838                 }
839                 
840                 double temp = SSTotal / (double) (numSamples - numTreatments);
841                 double numeratorTerm1 = (numSamples - numTreatments) * log(temp);
842                 double numerator = numeratorTerm1 - sum;
843                 double denom = 1 + ((1 / (double) (3 * (numTreatments - 1))) * sumDenom);
844                 
845                 BValue = numerator / denom;
846                 
847                 //calc Pvalue
848                 int count = 0;
849                 for (int i = 0; i < iters; i++) {
850                         if (m->control_pressed) { break; }
851                         
852                         //randomly shuffle names to randomize the matrix
853                         vector<string> copyNames = thisCombosLookupSets;
854                         random_shuffle(copyNames.begin(), copyNames.end());
855                         
856                         SSTotal = calcWithin(matrix, numTreatments, copyNames);
857                         
858                         counts.clear();
859                         map<string, double> randomSSWithin = calcWithinEach(matrix, numTreatments, copyNames, counts);
860                         
861                         sum = 0.0;
862                         sumDenom = 0.0;
863                         for (it = randomSSWithin.begin(); it != randomSSWithin.end(); it++) {
864                                 double temp2 = (it->second) / (double) (counts[it->first] - numTreatments);
865                                 sum += ((counts[it->first] - 1) * log(temp2));
866                                 sumDenom += ((1 / (double) (counts[it->first] - 1)) - ( 1 / (double) (numSamples - numTreatments) ));
867                         }
868                         
869                         temp = SSTotal / (double) (numSamples - numTreatments);
870                         numeratorTerm1 = (numSamples - numTreatments) * log(temp);
871                         numerator = numeratorTerm1 - sum;
872                         denom = 1 + ((1 / (double) (3 * (numTreatments - 1))) * sumDenom);
873                         
874                         double randomBValue = numerator / denom;
875                         
876                         if (randomBValue <= BValue) { count++; }
877                 }
878                                 
879                 pValue = count / (float) iters;
880                 
881                 out << BValue << '\t' << pValue << endl;
882
883                 return 0;
884         }
885         catch(exception& e) {
886                 m->errorOut(e, "HomovaCommand", "calcHomova");
887                 exit(1);
888         }
889 }
890 //**********************************************************************************************************************
891 map<string, double> HomovaCommand::calcWithinEach(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets, map<string, int>& count) {
892         try {
893                 map<string, double> within; //maps treatment to within value
894                 map<string, double>::iterator it;
895                 map<string, int>::iterator itCount;
896                 
897                 for (int i = 0; i < thisCombosLookupSets.size(); i++) {
898                         itCount = count.find(thisCombosLookupSets[i]);
899                         
900                         if (itCount == count.end()) { //first time we have seen this treatment
901                                 count[thisCombosLookupSets[i]] = 1; 
902                         }else {
903                                 count[thisCombosLookupSets[i]]++;
904                         }
905                         
906                         //initialize within
907                         within[thisCombosLookupSets[i]] = 0.0;
908                 }
909                         
910                 
911                 //traverse lower triangle
912                 for (int k = 0; k < thisMatrix.size(); k++) { 
913                         for (int l = k; l < thisMatrix[k].size(); l++) {
914                                 
915                                 //if you are from the same treatment then eij is 1 so add, else eij = 0
916                                 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) { 
917                                         within[thisCombosLookupSets[k]] += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
918                                 }
919                         }
920                 }
921                 
922                 //1 / (numSamples in this treatment)
923                 for (it = within.begin(); it != within.end(); it++) {
924                         (it->second) *= (1.0 / (float) count[it->first]);
925                 }
926                 
927                 return within;
928         }
929         catch(exception& e) {
930                 m->errorOut(e, "HomovaCommand", "calcWithinEach");
931                 exit(1);
932         }
933 }
934 //**********************************************************************************************************************
935 double HomovaCommand::calcWithin(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets) {
936         try {
937                 double total = 0.0;
938                 
939                 //traverse lower triangle
940                 for (int k = 0; k < thisMatrix.size(); k++) { 
941                         for (int l = k; l < thisMatrix[k].size(); l++) {
942                                 
943                                 //if you are from the same treatment then eij is 1 so add, else eij = 0
944                                 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) { 
945                                         total += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
946                                 }
947                         }
948                 }
949                 
950                 //1 / (numSamples / numTreatments)
951                 total *= (1.0 / (float) (thisMatrix.size() / (float) numTreatments));
952                 
953                 return total;
954         }
955         catch(exception& e) {
956                 m->errorOut(e, "HomovaCommand", "calcWithin");
957                 exit(1);
958         }
959 }
960 //**********************************************************************************************************************
961
962