]> git.donarmstrong.com Git - mothur.git/blob - amovacommand.cpp
added amova command
[mothur.git] / amovacommand.cpp
1 /*
2  *  amovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "amovacommand.h"
11 #include "sharedutilities.h"
12 #include "sharedsobscollectsummary.h"
13 #include "sharedchao1.h"
14 #include "sharedace.h"
15 #include "sharednseqs.h"
16 #include "sharedjabund.h"
17 #include "sharedsorabund.h"
18 #include "sharedjclass.h"
19 #include "sharedsorclass.h"
20 #include "sharedjest.h"
21 #include "sharedsorest.h"
22 #include "sharedthetayc.h"
23 #include "sharedthetan.h"
24 #include "sharedkstest.h"
25 #include "whittaker.h"
26 #include "sharedochiai.h"
27 #include "sharedanderbergs.h"
28 #include "sharedkulczynski.h"
29 #include "sharedkulczynskicody.h"
30 #include "sharedlennon.h"
31 #include "sharedmorisitahorn.h"
32 #include "sharedbraycurtis.h"
33 #include "sharedjackknife.h"
34 #include "whittaker.h"
35 #include "odum.h"
36 #include "canberra.h"
37 #include "structeuclidean.h"
38 #include "structchord.h"
39 #include "hellinger.h"
40 #include "manhattan.h"
41 #include "structpearson.h"
42 #include "soergel.h"
43 #include "spearman.h"
44 #include "structkulczynski.h"
45 #include "structchi2.h"
46 #include "speciesprofile.h"
47 #include "hamming.h"
48 #include "gower.h"
49 #include "memchi2.h"
50 #include "memchord.h"
51 #include "memeuclidean.h"
52 #include "mempearson.h"
53
54 //**********************************************************************************************************************
55 vector<string> AmovaCommand::getValidParameters(){      
56         try {
57                 string Array[] =  {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
58                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
59                 return myArray;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "AmovaCommand", "getValidParameters");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 AmovaCommand::AmovaCommand(){   
68         try {
69                 abort = true;
70                 //initialize outputTypes
71                 vector<string> tempOutNames;
72                 outputTypes["amova"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
76                 exit(1);
77         }
78 }
79 //**********************************************************************************************************************
80 vector<string> AmovaCommand::getRequiredParameters(){   
81         try {
82                 string Array[] =  {"design"};
83                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
84                 return myArray;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "AmovaCommand", "getRequiredParameters");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92 vector<string> AmovaCommand::getRequiredFiles(){        
93         try {
94                 string Array[] =  {};
95                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
96                 return myArray;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "AmovaCommand", "getRequiredFiles");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104
105 AmovaCommand::AmovaCommand(string option) {
106         try {
107                 globaldata = GlobalData::getInstance();
108                 abort = false;
109                 allLines = 1;
110                 labels.clear();
111                 
112                 //allow user to run help
113                 if(option == "help") { help(); abort = true; }
114                 
115                 else {
116                         //valid paramters for this command
117                         string AlignArray[] =  {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
118                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
119                         
120                         OptionParser parser(option);
121                         map<string,string> parameters = parser.getParameters();
122                         
123                         ValidParameters validParameter;
124                         
125                         //check to make sure all parameters are valid for command
126                         map<string,string>::iterator it;
127                         for (it = parameters.begin(); it != parameters.end(); it++) { 
128                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
129                         }
130                         
131                         //initialize outputTypes
132                         vector<string> tempOutNames;
133                         outputTypes["amova"] = tempOutNames;
134                         
135                         //if the user changes the output directory command factory will send this info to us in the output parameter 
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
137                         
138                         //if the user changes the input directory command factory will send this info to us in the output parameter 
139                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
140                         if (inputDir == "not found"){   inputDir = "";          }
141                         else {
142                                 string path;
143                                 it = parameters.find("design");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
149                                 }
150                                 
151                                 it = parameters.find("phylip");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
157                                 }
158                         }
159                         
160                         phylipfile = validParameter.validFile(parameters, "phylip", true);
161                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
162                         else if (phylipfile == "not found") { phylipfile = ""; }        
163                         else {  globaldata->newRead(); format = "phylip";  globaldata->setPhylipFile(phylipfile);       }
164                         
165                         //check for required parameters
166                         designfile = validParameter.validFile(parameters, "design", true);
167                         if (designfile == "not open") { abort = true; }
168                         else if (designfile == "not found") {  designfile = "";  m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }       
169                         
170                         //make sure the user has already run the read.otu command
171                         if ((globaldata->getSharedFile() == "")) {
172                                 if ((phylipfile == "")) {
173                                         m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the amova command."); m->mothurOutEndLine(); abort = true; 
174                                 }
175                         }else { sharedfile = globaldata->getSharedFile(); } 
176                         
177                         //use distance matrix if one is provided
178                         if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
179                         
180                         //check for optional parameter and set defaults
181                         // ...at some point should added some additional type checking...
182                         label = validParameter.validFile(parameters, "label", false);                   
183                         if (label == "not found") { label = ""; }
184                         else { 
185                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
186                                 else { allLines = 1;  }
187                         }
188                         
189                         //if the user has not specified any labels use the ones from read.otu
190                         if (label == "") {  
191                                 allLines = globaldata->allLines; 
192                                 labels = globaldata->labels; 
193                         }
194                         
195                         groups = validParameter.validFile(parameters, "groups", false);                 
196                         if (groups == "not found") { groups = "";  }
197                         else { 
198                                 m->splitAtDash(groups, Groups);
199                                 globaldata->Groups = Groups;
200                         }
201                         
202                         sets = validParameter.validFile(parameters, "sets", false);                     
203                         if (sets == "not found") { sets = ""; pickedGroups = false; }
204                         else { 
205                                 pickedGroups = true;
206                                 m->splitAtDash(sets, Sets);
207                         }
208                         
209                         
210                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
211                         convert(temp, iters); 
212                         
213                         temp = validParameter.validFile(parameters, "processors", false);                       if (temp == "not found"){       temp = "1";     }
214                         convert(temp, processors); 
215                         
216                         vector<string> Estimators;
217                         calc = validParameter.validFile(parameters, "calc", false);                     
218                         if (calc == "not found") { calc = "morisitahorn";  }
219                         m->splitAtDash(calc, Estimators);
220                                 
221                         if (abort == false) {
222                                 
223                                 ValidCalculators* validCalculator = new ValidCalculators();
224                                 
225                                 for (int i=0; i<Estimators.size(); i++) {
226                                         if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) { 
227                                                 if (Estimators[i] == "sharedsobs") { 
228                                                         calculators.push_back(new SharedSobsCS());
229                                                 }else if (Estimators[i] == "sharedchao") { 
230                                                         calculators.push_back(new SharedChao1());
231                                                 }else if (Estimators[i] == "sharedace") { 
232                                                         calculators.push_back(new SharedAce());
233                                                 }else if (Estimators[i] == "jabund") {  
234                                                         calculators.push_back(new JAbund());
235                                                 }else if (Estimators[i] == "sorabund") { 
236                                                         calculators.push_back(new SorAbund());
237                                                 }else if (Estimators[i] == "jclass") { 
238                                                         calculators.push_back(new Jclass());
239                                                 }else if (Estimators[i] == "sorclass") { 
240                                                         calculators.push_back(new SorClass());
241                                                 }else if (Estimators[i] == "jest") { 
242                                                         calculators.push_back(new Jest());
243                                                 }else if (Estimators[i] == "sorest") { 
244                                                         calculators.push_back(new SorEst());
245                                                 }else if (Estimators[i] == "thetayc") { 
246                                                         calculators.push_back(new ThetaYC());
247                                                 }else if (Estimators[i] == "thetan") { 
248                                                         calculators.push_back(new ThetaN());
249                                                 }else if (Estimators[i] == "kstest") { 
250                                                         calculators.push_back(new KSTest());
251                                                 }else if (Estimators[i] == "sharednseqs") { 
252                                                         calculators.push_back(new SharedNSeqs());
253                                                 }else if (Estimators[i] == "ochiai") { 
254                                                         calculators.push_back(new Ochiai());
255                                                 }else if (Estimators[i] == "anderberg") { 
256                                                         calculators.push_back(new Anderberg());
257                                                 }else if (Estimators[i] == "kulczynski") { 
258                                                         calculators.push_back(new Kulczynski());
259                                                 }else if (Estimators[i] == "kulczynskicody") { 
260                                                         calculators.push_back(new KulczynskiCody());
261                                                 }else if (Estimators[i] == "lennon") { 
262                                                         calculators.push_back(new Lennon());
263                                                 }else if (Estimators[i] == "morisitahorn") { 
264                                                         calculators.push_back(new MorHorn());
265                                                 }else if (Estimators[i] == "braycurtis") { 
266                                                         calculators.push_back(new BrayCurtis());
267                                                 }else if (Estimators[i] == "whittaker") { 
268                                                         calculators.push_back(new Whittaker());
269                                                 }else if (Estimators[i] == "odum") { 
270                                                         calculators.push_back(new Odum());
271                                                 }else if (Estimators[i] == "canberra") { 
272                                                         calculators.push_back(new Canberra());
273                                                 }else if (Estimators[i] == "structeuclidean") { 
274                                                         calculators.push_back(new StructEuclidean());
275                                                 }else if (Estimators[i] == "structchord") { 
276                                                         calculators.push_back(new StructChord());
277                                                 }else if (Estimators[i] == "hellinger") { 
278                                                         calculators.push_back(new Hellinger());
279                                                 }else if (Estimators[i] == "manhattan") { 
280                                                         calculators.push_back(new Manhattan());
281                                                 }else if (Estimators[i] == "structpearson") { 
282                                                         calculators.push_back(new StructPearson());
283                                                 }else if (Estimators[i] == "soergel") { 
284                                                         calculators.push_back(new Soergel());
285                                                 }else if (Estimators[i] == "spearman") { 
286                                                         calculators.push_back(new Spearman());
287                                                 }else if (Estimators[i] == "structkulczynski") { 
288                                                         calculators.push_back(new StructKulczynski());
289                                                 }else if (Estimators[i] == "speciesprofile") { 
290                                                         calculators.push_back(new SpeciesProfile());
291                                                 }else if (Estimators[i] == "hamming") { 
292                                                         calculators.push_back(new Hamming());
293                                                 }else if (Estimators[i] == "structchi2") { 
294                                                         calculators.push_back(new StructChi2());
295                                                 }else if (Estimators[i] == "gower") { 
296                                                         calculators.push_back(new Gower());
297                                                 }else if (Estimators[i] == "memchi2") { 
298                                                         calculators.push_back(new MemChi2());
299                                                 }else if (Estimators[i] == "memchord") { 
300                                                         calculators.push_back(new MemChord());
301                                                 }else if (Estimators[i] == "memeuclidean") { 
302                                                         calculators.push_back(new MemEuclidean());
303                                                 }else if (Estimators[i] == "mempearson") { 
304                                                         calculators.push_back(new MemPearson());
305                                                 }
306                                         }
307                                 }
308                         }
309                 }
310                 
311         }
312         catch(exception& e) {
313                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
314                 exit(1);
315         }
316 }
317
318 //**********************************************************************************************************************
319
320 void AmovaCommand::help(){
321         try {
322                 m->mothurOut("The amova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
323                 m->mothurOut("The amova command outputs a .amova file. \n");
324                 m->mothurOut("The amova command parameters are phylip, iters, groups, label, design, sets and processors.  The design parameter is required.\n");
325                 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running amova. It is required. \n");
326                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
327                 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
328                 m->mothurOut("The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n");
329                 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
330                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
331                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
332                 m->mothurOut("The amova command should be in the following format: amova(design=yourDesignFile).\n");
333                 m->mothurOut("Example amova(design=temp.design, groups=A-B-C).\n");
334                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
335                 
336         }
337         catch(exception& e) {
338                 m->errorOut(e, "AmovaCommand", "help");
339                 exit(1);
340         }
341 }
342
343 //**********************************************************************************************************************
344
345 AmovaCommand::~AmovaCommand(){}
346
347 //**********************************************************************************************************************
348
349 int AmovaCommand::execute(){
350         try {
351                 
352                 if (abort == true) { return 0; }
353                 
354                 //read design file
355                 designMap = new GroupMap(designfile);
356                 designMap->readDesignMap();
357                 
358                 //make sure sets are all in designMap
359                 SharedUtil* util = new SharedUtil();  
360                 util->setGroups(Sets, designMap->namesOfGroups);  
361                 delete util;
362                 
363                 //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
364                 int numGroups = Sets.size();
365                 if (pickedGroups) {
366                         for (int a=0; a<numGroups; a++) { 
367                                 for (int l = 0; l < a; l++) {   
368                                         vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
369                                         namesOfGroupCombos.push_back(groups);
370                                 }
371                         }
372                 }else { //one giant compare
373                         vector<string> groups;
374                         for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
375                         namesOfGroupCombos.push_back(groups);
376                 }
377                 
378                 //only 1 combo
379                 if (numGroups == 2) { processors = 1; }
380                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
381                 
382                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
383                         if(processors != 1){
384                                 int numPairs = namesOfGroupCombos.size();
385                                 int numPairsPerProcessor = numPairs / processors;
386                         
387                                 for (int i = 0; i < processors; i++) {
388                                         int startPos = i * numPairsPerProcessor;
389                                         if(i == processors - 1){
390                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
391                                         }
392                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
393                                 }
394                         }
395                 #endif
396                 
397                 if (sharedfile != "") { //create distance matrix for each label
398                         
399                         if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
400                         
401                         //for each calculator
402                         for(int i = 0 ; i < calculators.size(); i++) {
403                                 
404                                 //create a new filename
405                                 ofstream out;
406                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";                            
407                                 m->openOutputFile(outputFile, out);
408                                 outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
409                                 
410                                 //print headers
411                                 out << "label\tgroupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;  
412                                 out.close();
413                         }
414                         
415                         InputData input("sharedfile", sharedfile);
416                         vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
417                         string lastLabel = lookup[0]->getLabel();
418                         
419                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
420                         set<string> processedLabels;
421                         set<string> userLabels = labels;
422                         
423                         //sanity check between shared file groups and design file groups
424                         for (int i = 0; i < lookup.size(); i++) { 
425                                 string group = designMap->getGroup(lookup[i]->getGroup());
426                                 
427                                 if (group == "not found") { m->control_pressed = true;  m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine();  }
428                         }
429                         
430                         //as long as you are not at the end of the file or done wih the lines you want
431                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
432                                 
433                                 if (m->control_pressed) {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } return 0; }
434                                 
435                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
436                                         
437                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
438                                         process(lookup);
439                                         
440                                         processedLabels.insert(lookup[0]->getLabel());
441                                         userLabels.erase(lookup[0]->getLabel());
442                                 }
443                                 
444                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
445                                         string saveLabel = lookup[0]->getLabel();
446                                         
447                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
448                                         lookup = input.getSharedRAbundVectors(lastLabel);
449                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
450                                         
451                                         process(lookup);
452                                         
453                                         processedLabels.insert(lookup[0]->getLabel());
454                                         userLabels.erase(lookup[0]->getLabel());
455                                         
456                                         //restore real lastlabel to save below
457                                         lookup[0]->setLabel(saveLabel);
458                                 }
459                                 
460                                 lastLabel = lookup[0]->getLabel();
461                                 //prevent memory leak
462                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
463                                 
464                                 if (m->control_pressed) {  globaldata->Groups.clear();   delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str()); } return 0; }
465                                 
466                                 //get next line to process
467                                 lookup = input.getSharedRAbundVectors();                                
468                         }
469                         
470                         if (m->control_pressed) { globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }  return 0; }
471                         
472                         //output error messages about any remaining user labels
473                         set<string>::iterator it;
474                         bool needToRun = false;
475                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
476                                 m->mothurOut("Your file does not include the label " + *it); 
477                                 if (processedLabels.count(lastLabel) != 1) {
478                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
479                                         needToRun = true;
480                                 }else {
481                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
482                                 }
483                         }
484                         
485                         //run last label if you need to
486                         if (needToRun == true)  {
487                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
488                                 lookup = input.getSharedRAbundVectors(lastLabel);
489                                 
490                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
491                                 
492                                 process(lookup);
493                                 
494                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
495                         }
496                         
497                         //reset groups parameter
498                         globaldata->Groups.clear();  
499                         
500                         
501                 }else { //user provided distance matrix
502                         
503                         if (outputDir == "") { outputDir = m->hasPath(phylipfile); }
504                                         
505                         //create a new filename
506                         ofstream out;
507                         string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile))  + "amova";                                
508                         m->openOutputFile(outputFile, out);
509                         outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
510                         
511                         //print headers
512                         out << "groupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;  
513                         out.close();
514                         
515                         ReadPhylipVector readMatrix(phylipfile);
516                         vector< vector<double> > completeMatrix;
517                         vector<string> names = readMatrix.read(completeMatrix);
518                         
519                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
520                                 if(processors == 1){
521                                         driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
522                                 }else{
523                                         int process = 1;
524                                         vector<int> processIDS;
525                                         
526                                         //loop through and create all the processes you want
527                                         while (process != processors) {
528                                                 int pid = fork();
529                                                 
530                                                 if (pid > 0) {
531                                                         processIDS.push_back(pid);  
532                                                         process++;
533                                                 }else if (pid == 0){
534                                                         driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
535                                                         exit(0);
536                                                 }else { 
537                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
538                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
539                                                         exit(0);
540                                                 }
541                                         }
542                                         
543                                         //do my part
544                                         driver(lines[0].start, lines[0].num, names, "", completeMatrix);
545                                         
546                                         //force parent to wait until all the processes are done
547                                         for (int i=0;i<(processors-1);i++) { 
548                                                 int temp = processIDS[i];
549                                                 wait(&temp);
550                                         }
551                                         
552                                         //append files
553                                         string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "amova";                         
554                                         for (int j = 0; j < processIDS.size(); j++) {
555                                                 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
556                                                 remove((outputFile + toString(processIDS[j])).c_str());
557                                         }
558                                         
559                                 }
560                         #else
561                                 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
562                         #endif
563                         
564                 }
565                 
566                 delete designMap;
567          
568                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
569
570                 m->mothurOutEndLine();
571                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
572                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
573                 m->mothurOutEndLine();
574                 
575                 return 0;
576         }
577         catch(exception& e) {
578                 m->errorOut(e, "AmovaCommand", "execute");
579                 exit(1);
580         }
581 }
582 //**********************************************************************************************************************
583
584 int AmovaCommand::process(vector<SharedRAbundVector*> thisLookup) {
585         try{
586                 
587 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
588                 if(processors == 1){
589                         driver(0, namesOfGroupCombos.size(), thisLookup, "");
590                 }else{
591                         int process = 1;
592                         vector<int> processIDS;
593                         
594                         //loop through and create all the processes you want
595                         while (process != processors) {
596                                 int pid = fork();
597                                 
598                                 if (pid > 0) {
599                                         processIDS.push_back(pid);  
600                                         process++;
601                                 }else if (pid == 0){
602                                         driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
603                                         exit(0);
604                                 }else { 
605                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
606                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
607                                         exit(0);
608                                 }
609                         }
610                         
611                         //do my part
612                         driver(lines[0].start, lines[0].num, thisLookup, "");
613                         
614                         //force parent to wait until all the processes are done
615                         for (int i=0;i<(processors-1);i++) { 
616                                 int temp = processIDS[i];
617                                 wait(&temp);
618                         }
619                         
620                         //append files
621                         for(int i = 0 ; i < calculators.size(); i++) {
622                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";                            
623
624                                 for (int j = 0; j < processIDS.size(); j++) {
625                                         m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
626                                         remove((outputFile + toString(processIDS[j])).c_str());
627                                 }
628                         }
629                 }
630 #else
631                 driver(0, namesOfGroupCombos.size(), thisLookUp, "");
632 #endif
633                         
634                 return 0;
635         }
636         catch(exception& e) {
637                 m->errorOut(e, "AmovaCommand", "process");
638                 exit(1);
639         }
640 }
641 //**********************************************************************************************************************
642
643 int AmovaCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
644         try {
645                 vector<SharedRAbundVector*> subset;
646                 EstOutput data;
647                 
648                 //for each calculator
649                 for(int i = 0 ; i < calculators.size(); i++) {
650                         
651                         //create a new filename
652                         ofstream out;
653                         string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova" + pidValue;                         
654                         m->openOutputFileAppend(outputFile, out);
655                         out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
656                         
657                         //for each combo
658                         for (int c = start; c < (start+num); c++) {
659                                 
660                                 if (m->control_pressed) { out.close(); return 0; }
661                                 
662                                 //get set names
663                                 vector<string> setNames;
664                                 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
665                                 
666                                 vector<SharedRAbundVector*> thisCombosLookup;
667                                 vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
668                                 for (int k = 0; k < thisLookup.size(); k++) {
669                                         string thisGroup = thisLookup[k]->getGroup();
670                                         
671                                         //is this group for a set we want to compare??
672                                         if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {  
673                                                 thisCombosLookup.push_back(thisLookup[k]);
674                                                 thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
675                                         }
676                                         
677                                 }
678                                 
679                                 int numGroups = thisCombosLookup.size();
680                         
681                                 //calc the distance matrix
682                                 matrix.clear();
683                                 matrix.resize(numGroups);
684                                 for (int k = 0; k < matrix.size(); k++) {       for (int j = 0; j < matrix.size(); j++) {       matrix[k].push_back(1.0);       }       }
685                                 
686                                 if (thisCombosLookup.size() == 0)  { 
687                                         m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine(); 
688                                 }else{
689                                         
690                                         out << thisLookup[0]->getLabel() << '\t';
691                                         if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
692                                         else { out << "all" << '\t'; }
693                                         
694                                         for (int k = 0; k < thisCombosLookup.size(); k++) { 
695                                                 for (int l = k; l < thisCombosLookup.size(); l++) {
696                                                         
697                                                         if (m->control_pressed) { out.close(); return 0; }
698                                                         
699                                                         if (k != l) { //we dont need to similiarity of a groups to itself
700                                                                 //get estimated similarity between 2 groups
701                                                                 subset.clear(); //clear out old pair of sharedrabunds
702                                                                 //add new pair of sharedrabunds
703                                                                 subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]); 
704                                                                 
705                                                                 //if this calc needs all groups to calculate the pair load all groups
706                                                                 if (calculators[i]->getNeedsAll()) { 
707                                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
708                                                                         for (int w = 0; w < thisCombosLookup.size(); w++) {
709                                                                                 if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
710                                                                         }
711                                                                 }
712                                                                 
713                                                                 data = calculators[i]->getValues(subset); //saves the calculator outputs
714                                                                 
715                                                                 //save values in similarity matrix
716                                                                 matrix[k][l] = 1.0 - data[0];
717                                                                 matrix[l][k] = 1.0 - data[0];
718                                                         }
719                                                 }
720                                         }
721                                         
722                                         //calc amova
723                                         calcAmova(out, setNames.size(), thisCombosLookupSets);
724                                 }
725                         }
726                         
727                         out.close();
728                 }               
729                 
730                 return 0;
731                 
732         }
733         catch(exception& e) {
734                 m->errorOut(e, "AmovaCommand", "driver");
735                 exit(1);
736         }
737 }
738 //**********************************************************************************************************************
739
740 int AmovaCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
741         try {
742                 
743                 //create a new filename
744                 ofstream out;
745                 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "amova" + pidValue;                              
746                 m->openOutputFileAppend(outputFile, out);
747                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
748                 
749                 //for each combo
750                 for (int c = start; c < (start+num); c++) {
751                         
752                         if (m->control_pressed) { out.close(); return 0; }
753                         
754                         //get set names
755                         vector<string> setNames;
756                         for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
757                         
758                         vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
759                         set<int> indexes;
760                         for (int k = 0; k < names.size(); k++) {
761                                 //is this group for a set we want to compare??
762                                 if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {  
763                                         thisCombosSets.push_back(designMap->getGroup(names[k]));        
764                                         indexes.insert(k); //save indexes of valid rows in matrix for submatrix
765                                 }
766                         }
767                         
768                         int numGroups = thisCombosSets.size();
769                         
770                         //calc the distance matrix
771                         matrix.clear();
772                         matrix.resize(numGroups);
773                         
774                         for (int k = 0; k < matrix.size(); k++) {       for (int j = 0; j < matrix.size(); j++) {       matrix[k].push_back(1.0);       }       }
775                         
776                         if (thisCombosSets.size() == 0)  { 
777                                 m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine(); 
778                         }else{
779                                 
780                                 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
781                                 else { out << "all" << '\t'; }
782                                 
783                                 //fill submatrix
784                                 int rowCount = 0;
785                                 for (int j = 0; j < completeMatrix.size(); j++) {
786                                         
787                                         if (indexes.count(j) != 0) { //we want at least part of this row
788                                                 int columnCount = 0;
789                                                 for (int k = 0; k < completeMatrix[j].size(); k++) {
790                                                         
791                                                         if (indexes.count(k) != 0) { //we want this distance
792                                                                 matrix[rowCount][columnCount] = completeMatrix[j][k];
793                                                                 matrix[columnCount][rowCount] = completeMatrix[j][k];
794                                                                 columnCount++;
795                                                         }
796                                                 }
797                                                 rowCount++;
798                                         }
799                                 }
800                                 
801                                 //calc amova
802                                 calcAmova(out, setNames.size(), thisCombosSets);
803                         }
804                 }
805                 
806                 out.close();
807                 
808         
809                 return 0;
810                 
811         }
812         catch(exception& e) {
813                 m->errorOut(e, "AmovaCommand", "driver");
814                 exit(1);
815         }
816 }
817 //**********************************************************************************************************************
818 int AmovaCommand::calcAmova(ofstream& out, int numTreatments, vector<string> thisCombosLookupSets) {
819         try {
820                 
821                 double SSWithin, SSTotal, SSAmoung, MSWithin, MSAmoung, FStatistic, pValue;
822                 
823                 SSWithin = calcWithin(matrix, numTreatments, thisCombosLookupSets);
824                 SSTotal = calcTotal(numTreatments);
825                 
826                 int count = 0;
827                 for (int i = 0; i < iters; i++) {
828                         if (m->control_pressed) { break; }
829                         
830                         //randomly shuffle names to randomize the matrix
831                         vector<string> copyNames = thisCombosLookupSets;
832                         random_shuffle(copyNames.begin(), copyNames.end());
833                         
834                         double randomSSWithin = calcWithin(matrix, numTreatments, copyNames);
835                         
836                         if (randomSSWithin <= SSWithin) { count++; }
837                 }
838                 
839                 SSAmoung = SSTotal - SSWithin;
840                 MSWithin = SSWithin / (double) (matrix.size() - numTreatments);
841                 MSAmoung = SSAmoung / (double) (numTreatments - 1);
842                 FStatistic = MSAmoung / MSWithin;
843                 
844                 pValue = count / (float) iters;
845                 
846                 out << MSWithin << '\t' << MSAmoung << '\t' << FStatistic << '\t' << pValue << endl;
847                 //cout << "ssAmong = " << SSAmoung << '\t' << "sswithin = " << SSWithin << '\t' << "ssTotal = " << SSTotal << '\t' << "pvalue = " << pValue << endl;    
848                 return 0;
849         }
850         catch(exception& e) {
851                 m->errorOut(e, "AmovaCommand", "calcAmova");
852                 exit(1);
853         }
854 }
855 //**********************************************************************************************************************
856 double AmovaCommand::calcWithin(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets) {
857         try {
858                 double within = 0.0;
859                 
860                 //traverse lower triangle
861                 for (int k = 0; k < thisMatrix.size(); k++) { 
862                         for (int l = k; l < thisMatrix[k].size(); l++) {
863                                 
864                                 //if you are from the same treatment then eij is 1 so add, else eij = 0
865                                 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) { 
866                                         within += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
867                                 }
868                         }
869                 }
870                 
871                 //1 / (numSamples / numTreatments)
872                 within *= (1.0 / (float) (thisMatrix.size() / (float) numTreatments));
873                 
874                 return within;
875         }
876         catch(exception& e) {
877                 m->errorOut(e, "AmovaCommand", "calcWithin");
878                 exit(1);
879         }
880 }
881 //**********************************************************************************************************************
882 double AmovaCommand::calcTotal(int numTreatments) {
883         try {
884                 double total = 0.0;
885                 
886                 //traverse lower triangle
887                 for (int k = 0; k < matrix.size(); k++) { 
888                         for (int l = k; l < matrix[k].size(); l++) {
889                                 total += (matrix[k][l] * matrix[k][l]); //dij^2
890                         }
891                 }
892                 
893                 //1 / numSamples
894                 total *= (1.0 / (float) matrix.size());
895                 
896                 return total;
897         }
898         catch(exception& e) {
899                 m->errorOut(e, "AmovaCommand", "calcTotal");
900                 exit(1);
901         }
902 }
903 //**********************************************************************************************************************
904