]> git.donarmstrong.com Git - mothur.git/blob - amovacommand.cpp
added amova command
[mothur.git] / amovacommand.cpp
1 /*
2  *  amovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "amovacommand.h"
11 #include "sharedutilities.h"
12 #include "sharedsobscollectsummary.h"
13 #include "sharedchao1.h"
14 #include "sharedace.h"
15 #include "sharednseqs.h"
16 #include "sharedjabund.h"
17 #include "sharedsorabund.h"
18 #include "sharedjclass.h"
19 #include "sharedsorclass.h"
20 #include "sharedjest.h"
21 #include "sharedsorest.h"
22 #include "sharedthetayc.h"
23 #include "sharedthetan.h"
24 #include "sharedkstest.h"
25 #include "whittaker.h"
26 #include "sharedochiai.h"
27 #include "sharedanderbergs.h"
28 #include "sharedkulczynski.h"
29 #include "sharedkulczynskicody.h"
30 #include "sharedlennon.h"
31 #include "sharedmorisitahorn.h"
32 #include "sharedbraycurtis.h"
33 #include "sharedjackknife.h"
34 #include "whittaker.h"
35 #include "odum.h"
36 #include "canberra.h"
37 #include "structeuclidean.h"
38 #include "structchord.h"
39 #include "hellinger.h"
40 #include "manhattan.h"
41 #include "structpearson.h"
42 #include "soergel.h"
43 #include "spearman.h"
44 #include "structkulczynski.h"
45 #include "structchi2.h"
46 #include "speciesprofile.h"
47 #include "hamming.h"
48 #include "gower.h"
49 #include "memchi2.h"
50 #include "memchord.h"
51 #include "memeuclidean.h"
52 #include "mempearson.h"
53
54 //**********************************************************************************************************************
55 vector<string> AmovaCommand::getValidParameters(){      
56         try {
57                 string Array[] =  {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
58                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
59                 return myArray;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "AmovaCommand", "getValidParameters");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 AmovaCommand::AmovaCommand(){   
68         try {
69                 abort = true;
70                 //initialize outputTypes
71                 vector<string> tempOutNames;
72                 outputTypes["amova"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
76                 exit(1);
77         }
78 }
79 //**********************************************************************************************************************
80 vector<string> AmovaCommand::getRequiredParameters(){   
81         try {
82                 string Array[] =  {"design"};
83                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
84                 return myArray;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "AmovaCommand", "getRequiredParameters");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92 vector<string> AmovaCommand::getRequiredFiles(){        
93         try {
94                 string Array[] =  {};
95                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
96                 return myArray;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "AmovaCommand", "getRequiredFiles");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104
105 AmovaCommand::AmovaCommand(string option) {
106         try {
107                 globaldata = GlobalData::getInstance();
108                 abort = false;
109                 allLines = 1;
110                 labels.clear();
111                 
112                 //allow user to run help
113                 if(option == "help") { help(); abort = true; }
114                 
115                 else {
116                         //valid paramters for this command
117                         string AlignArray[] =  {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
118                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
119                         
120                         OptionParser parser(option);
121                         map<string,string> parameters = parser.getParameters();
122                         
123                         ValidParameters validParameter;
124                         
125                         //check to make sure all parameters are valid for command
126                         map<string,string>::iterator it;
127                         for (it = parameters.begin(); it != parameters.end(); it++) { 
128                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
129                         }
130                         
131                         //initialize outputTypes
132                         vector<string> tempOutNames;
133                         outputTypes["amova"] = tempOutNames;
134                         
135                         //if the user changes the output directory command factory will send this info to us in the output parameter 
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
137                         
138                         //if the user changes the input directory command factory will send this info to us in the output parameter 
139                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
140                         if (inputDir == "not found"){   inputDir = "";          }
141                         else {
142                                 string path;
143                                 it = parameters.find("design");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
149                                 }
150                                 
151                                 it = parameters.find("phylip");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
157                                 }
158                         }
159                         
160                         phylipfile = validParameter.validFile(parameters, "phylip", true);
161                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
162                         else if (phylipfile == "not found") { phylipfile = ""; }        
163                         else {  globaldata->newRead(); format = "phylip";  globaldata->setPhylipFile(phylipfile);       }
164                         
165                         //check for required parameters
166                         designfile = validParameter.validFile(parameters, "design", true);
167                         if (designfile == "not open") { abort = true; }
168                         else if (designfile == "not found") {  designfile = "";  m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }       
169                         
170                         //make sure the user has already run the read.otu command
171                         if ((globaldata->getSharedFile() == "")) {
172                                 if ((phylipfile == "")) {
173                                         m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the amova command."); m->mothurOutEndLine(); abort = true; 
174                                 }
175                         }else { sharedfile = globaldata->getSharedFile(); } 
176                         
177                         //use distance matrix if one is provided
178                         if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
179                         
180                         //check for optional parameter and set defaults
181                         // ...at some point should added some additional type checking...
182                         label = validParameter.validFile(parameters, "label", false);                   
183                         if (label == "not found") { label = ""; }
184                         else { 
185                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
186                                 else { allLines = 1;  }
187                         }
188                         
189                         //if the user has not specified any labels use the ones from read.otu
190                         if (label == "") {  
191                                 allLines = globaldata->allLines; 
192                                 labels = globaldata->labels; 
193                         }
194                         
195                         groups = validParameter.validFile(parameters, "groups", false);                 
196                         if (groups == "not found") { groups = "";  }
197                         else { 
198                                 m->splitAtDash(groups, Groups);
199                                 globaldata->Groups = Groups;
200                         }
201                         
202                         sets = validParameter.validFile(parameters, "sets", false);                     
203                         if (sets == "not found") { sets = ""; pickedGroups = false; }
204                         else { 
205                                 pickedGroups = true;
206                                 m->splitAtDash(sets, Sets);
207                         }
208                         
209                         
210                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
211                         convert(temp, iters); 
212                         
213                         temp = validParameter.validFile(parameters, "processors", false);                       if (temp == "not found"){       temp = "1";     }
214                         convert(temp, processors); 
215                         
216                         vector<string> Estimators;
217                         calc = validParameter.validFile(parameters, "calc", false);                     
218                         if (calc == "not found") { calc = "morisitahorn";  }
219                         m->splitAtDash(calc, Estimators);
220                                 
221                         if (abort == false) {
222                                 
223                                 ValidCalculators* validCalculator = new ValidCalculators();
224                                 
225                                 if (sharedfile != "") {
226                                         
227                                         for (int i=0; i<Estimators.size(); i++) {
228                                                 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) { 
229                                                         if (Estimators[i] == "sharedsobs") { 
230                                                                 calculators.push_back(new SharedSobsCS());
231                                                         }else if (Estimators[i] == "sharedchao") { 
232                                                                 calculators.push_back(new SharedChao1());
233                                                         }else if (Estimators[i] == "sharedace") { 
234                                                                 calculators.push_back(new SharedAce());
235                                                         }else if (Estimators[i] == "jabund") {  
236                                                                 calculators.push_back(new JAbund());
237                                                         }else if (Estimators[i] == "sorabund") { 
238                                                                 calculators.push_back(new SorAbund());
239                                                         }else if (Estimators[i] == "jclass") { 
240                                                                 calculators.push_back(new Jclass());
241                                                         }else if (Estimators[i] == "sorclass") { 
242                                                                 calculators.push_back(new SorClass());
243                                                         }else if (Estimators[i] == "jest") { 
244                                                                 calculators.push_back(new Jest());
245                                                         }else if (Estimators[i] == "sorest") { 
246                                                                 calculators.push_back(new SorEst());
247                                                         }else if (Estimators[i] == "thetayc") { 
248                                                                 calculators.push_back(new ThetaYC());
249                                                         }else if (Estimators[i] == "thetan") { 
250                                                                 calculators.push_back(new ThetaN());
251                                                         }else if (Estimators[i] == "kstest") { 
252                                                                 calculators.push_back(new KSTest());
253                                                         }else if (Estimators[i] == "sharednseqs") { 
254                                                                 calculators.push_back(new SharedNSeqs());
255                                                         }else if (Estimators[i] == "ochiai") { 
256                                                                 calculators.push_back(new Ochiai());
257                                                         }else if (Estimators[i] == "anderberg") { 
258                                                                 calculators.push_back(new Anderberg());
259                                                         }else if (Estimators[i] == "kulczynski") { 
260                                                                 calculators.push_back(new Kulczynski());
261                                                         }else if (Estimators[i] == "kulczynskicody") { 
262                                                                 calculators.push_back(new KulczynskiCody());
263                                                         }else if (Estimators[i] == "lennon") { 
264                                                                 calculators.push_back(new Lennon());
265                                                         }else if (Estimators[i] == "morisitahorn") { 
266                                                                 calculators.push_back(new MorHorn());
267                                                         }else if (Estimators[i] == "braycurtis") { 
268                                                                 calculators.push_back(new BrayCurtis());
269                                                         }else if (Estimators[i] == "whittaker") { 
270                                                                 calculators.push_back(new Whittaker());
271                                                         }else if (Estimators[i] == "odum") { 
272                                                                 calculators.push_back(new Odum());
273                                                         }else if (Estimators[i] == "canberra") { 
274                                                                 calculators.push_back(new Canberra());
275                                                         }else if (Estimators[i] == "structeuclidean") { 
276                                                                 calculators.push_back(new StructEuclidean());
277                                                         }else if (Estimators[i] == "structchord") { 
278                                                                 calculators.push_back(new StructChord());
279                                                         }else if (Estimators[i] == "hellinger") { 
280                                                                 calculators.push_back(new Hellinger());
281                                                         }else if (Estimators[i] == "manhattan") { 
282                                                                 calculators.push_back(new Manhattan());
283                                                         }else if (Estimators[i] == "structpearson") { 
284                                                                 calculators.push_back(new StructPearson());
285                                                         }else if (Estimators[i] == "soergel") { 
286                                                                 calculators.push_back(new Soergel());
287                                                         }else if (Estimators[i] == "spearman") { 
288                                                                 calculators.push_back(new Spearman());
289                                                         }else if (Estimators[i] == "structkulczynski") { 
290                                                                 calculators.push_back(new StructKulczynski());
291                                                         }else if (Estimators[i] == "speciesprofile") { 
292                                                                 calculators.push_back(new SpeciesProfile());
293                                                         }else if (Estimators[i] == "hamming") { 
294                                                                 calculators.push_back(new Hamming());
295                                                         }else if (Estimators[i] == "structchi2") { 
296                                                                 calculators.push_back(new StructChi2());
297                                                         }else if (Estimators[i] == "gower") { 
298                                                                 calculators.push_back(new Gower());
299                                                         }else if (Estimators[i] == "memchi2") { 
300                                                                 calculators.push_back(new MemChi2());
301                                                         }else if (Estimators[i] == "memchord") { 
302                                                                 calculators.push_back(new MemChord());
303                                                         }else if (Estimators[i] == "memeuclidean") { 
304                                                                 calculators.push_back(new MemEuclidean());
305                                                         }else if (Estimators[i] == "mempearson") { 
306                                                                 calculators.push_back(new MemPearson());
307                                                         }
308                                                 }
309                                         }
310                                 }
311                         }
312                 }
313                 
314         }
315         catch(exception& e) {
316                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
317                 exit(1);
318         }
319 }
320
321 //**********************************************************************************************************************
322
323 void AmovaCommand::help(){
324         try {
325                 m->mothurOut("The amova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
326                 m->mothurOut("The amova command outputs a .amova file. \n");
327                 m->mothurOut("The amova command parameters are phylip, iters, groups, label, design, sets and processors.  The design parameter is required.\n");
328                 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running amova. It is required. \n");
329                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
330                 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
331                 m->mothurOut("The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n");
332                 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
333                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
334                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
335                 m->mothurOut("The amova command should be in the following format: amova(design=yourDesignFile).\n");
336                 m->mothurOut("Example amova(design=temp.design, groups=A-B-C).\n");
337                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
338                 
339         }
340         catch(exception& e) {
341                 m->errorOut(e, "AmovaCommand", "help");
342                 exit(1);
343         }
344 }
345
346 //**********************************************************************************************************************
347
348 AmovaCommand::~AmovaCommand(){}
349
350 //**********************************************************************************************************************
351
352 int AmovaCommand::execute(){
353         try {
354                 
355                 if (abort == true) { return 0; }
356                 
357                 //read design file
358                 designMap = new GroupMap(designfile);
359                 designMap->readDesignMap();
360                 
361                 //make sure sets are all in designMap
362                 SharedUtil* util = new SharedUtil();  
363                 util->setGroups(Sets, designMap->namesOfGroups);  
364                 delete util;
365                 
366                 //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
367                 int numGroups = Sets.size();
368                 if (pickedGroups) {
369                         for (int a=0; a<numGroups; a++) { 
370                                 for (int l = 0; l < a; l++) {   
371                                         vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
372                                         namesOfGroupCombos.push_back(groups);
373                                 }
374                         }
375                 }else { //one giant compare
376                         vector<string> groups;
377                         for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
378                         namesOfGroupCombos.push_back(groups);
379                 }
380                 
381                 //only 1 combo
382                 if (numGroups == 2) { processors = 1; }
383                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
384                 
385                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
386                         if(processors != 1){
387                                 int numPairs = namesOfGroupCombos.size();
388                                 int numPairsPerProcessor = numPairs / processors;
389                         
390                                 for (int i = 0; i < processors; i++) {
391                                         int startPos = i * numPairsPerProcessor;
392                                         if(i == processors - 1){
393                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
394                                         }
395                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
396                                 }
397                         }
398                 #endif
399                 
400                 if (sharedfile != "") { //create distance matrix for each label
401                         
402                         if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
403                         
404                         //for each calculator
405                         for(int i = 0 ; i < calculators.size(); i++) {
406                                 
407                                 //create a new filename
408                                 ofstream out;
409                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";                            
410                                 m->openOutputFile(outputFile, out);
411                                 outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
412                                 
413                                 //print headers
414                                 out << "label\tgroupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;  
415                                 out.close();
416                         }
417                         
418                         InputData input("sharedfile", sharedfile);
419                         vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
420                         string lastLabel = lookup[0]->getLabel();
421                         
422                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
423                         set<string> processedLabels;
424                         set<string> userLabels = labels;
425                         
426                         //sanity check between shared file groups and design file groups
427                         for (int i = 0; i < lookup.size(); i++) { 
428                                 string group = designMap->getGroup(lookup[i]->getGroup());
429                                 
430                                 if (group == "not found") { m->control_pressed = true;  m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine();  }
431                         }
432                         
433                         //as long as you are not at the end of the file or done wih the lines you want
434                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
435                                 
436                                 if (m->control_pressed) {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } return 0; }
437                                 
438                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
439                                         
440                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
441                                         process(lookup);
442                                         
443                                         processedLabels.insert(lookup[0]->getLabel());
444                                         userLabels.erase(lookup[0]->getLabel());
445                                 }
446                                 
447                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
448                                         string saveLabel = lookup[0]->getLabel();
449                                         
450                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
451                                         lookup = input.getSharedRAbundVectors(lastLabel);
452                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
453                                         
454                                         process(lookup);
455                                         
456                                         processedLabels.insert(lookup[0]->getLabel());
457                                         userLabels.erase(lookup[0]->getLabel());
458                                         
459                                         //restore real lastlabel to save below
460                                         lookup[0]->setLabel(saveLabel);
461                                 }
462                                 
463                                 lastLabel = lookup[0]->getLabel();
464                                 //prevent memory leak
465                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
466                                 
467                                 if (m->control_pressed) {  globaldata->Groups.clear();   delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str()); } return 0; }
468                                 
469                                 //get next line to process
470                                 lookup = input.getSharedRAbundVectors();                                
471                         }
472                         
473                         if (m->control_pressed) { globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }  return 0; }
474                         
475                         //output error messages about any remaining user labels
476                         set<string>::iterator it;
477                         bool needToRun = false;
478                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
479                                 m->mothurOut("Your file does not include the label " + *it); 
480                                 if (processedLabels.count(lastLabel) != 1) {
481                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
482                                         needToRun = true;
483                                 }else {
484                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
485                                 }
486                         }
487                         
488                         //run last label if you need to
489                         if (needToRun == true)  {
490                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
491                                 lookup = input.getSharedRAbundVectors(lastLabel);
492                                 
493                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
494                                 
495                                 process(lookup);
496                                 
497                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
498                         }
499                         
500                         //reset groups parameter
501                         globaldata->Groups.clear();  
502                         
503                         
504                 }else { //user provided distance matrix
505                                 
506                         //for each calculator
507                         for(int i = 0 ; i < calculators.size(); i++) {
508                                 //create a new filename
509                                 ofstream out;
510                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + calculators[i]->getName() + ".amova";                            
511                                 m->openOutputFile(outputFile, out);
512                                 outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
513                                 
514                                 //print headers
515                                 out << "groupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;  
516                                 out.close();
517                         }
518                         
519                         ReadPhylipVector readMatrix(phylipfile);
520                         vector< vector<double> > completeMatrix;
521                         vector<string> names = readMatrix.read(completeMatrix);
522                         
523                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
524                                 if(processors == 1){
525                                         driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
526                                 }else{
527                                         int process = 1;
528                                         vector<int> processIDS;
529                                         
530                                         //loop through and create all the processes you want
531                                         while (process != processors) {
532                                                 int pid = fork();
533                                                 
534                                                 if (pid > 0) {
535                                                         processIDS.push_back(pid);  
536                                                         process++;
537                                                 }else if (pid == 0){
538                                                         driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
539                                                         exit(0);
540                                                 }else { 
541                                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
542                                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
543                                                         exit(0);
544                                                 }
545                                         }
546                                         
547                                         //do my part
548                                         driver(lines[0].start, lines[0].num, names, "", completeMatrix);
549                                         
550                                         //force parent to wait until all the processes are done
551                                         for (int i=0;i<(processors-1);i++) { 
552                                                 int temp = processIDS[i];
553                                                 wait(&temp);
554                                         }
555                                         
556                                         //append files
557                                         for(int i = 0 ; i < calculators.size(); i++) {
558                                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + calculators[i]->getName() + ".amova";                            
559                                                 
560                                                 for (int j = 0; j < processIDS.size(); j++) {
561                                                         m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
562                                                         remove((outputFile + toString(processIDS[j])).c_str());
563                                                 }
564                                         }
565                                 }
566                         #else
567                                 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
568                         #endif
569                         
570                 }
571                 
572                 delete designMap;
573          
574                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
575
576                 m->mothurOutEndLine();
577                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
578                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
579                 m->mothurOutEndLine();
580                 
581                 return 0;
582         }
583         catch(exception& e) {
584                 m->errorOut(e, "AmovaCommand", "execute");
585                 exit(1);
586         }
587 }
588 //**********************************************************************************************************************
589
590 int AmovaCommand::process(vector<SharedRAbundVector*> thisLookup) {
591         try{
592                 
593 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
594                 if(processors == 1){
595                         driver(0, namesOfGroupCombos.size(), thisLookup, "");
596                 }else{
597                         int process = 1;
598                         vector<int> processIDS;
599                         
600                         //loop through and create all the processes you want
601                         while (process != processors) {
602                                 int pid = fork();
603                                 
604                                 if (pid > 0) {
605                                         processIDS.push_back(pid);  
606                                         process++;
607                                 }else if (pid == 0){
608                                         driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
609                                         exit(0);
610                                 }else { 
611                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
612                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
613                                         exit(0);
614                                 }
615                         }
616                         
617                         //do my part
618                         driver(lines[0].start, lines[0].num, thisLookup, "");
619                         
620                         //force parent to wait until all the processes are done
621                         for (int i=0;i<(processors-1);i++) { 
622                                 int temp = processIDS[i];
623                                 wait(&temp);
624                         }
625                         
626                         //append files
627                         for(int i = 0 ; i < calculators.size(); i++) {
628                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";                            
629
630                                 for (int j = 0; j < processIDS.size(); j++) {
631                                         m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
632                                         remove((outputFile + toString(processIDS[j])).c_str());
633                                 }
634                         }
635                 }
636 #else
637                 driver(0, namesOfGroupCombos.size(), thisLookUp, "");
638 #endif
639                         
640                 return 0;
641         }
642         catch(exception& e) {
643                 m->errorOut(e, "AmovaCommand", "process");
644                 exit(1);
645         }
646 }
647 //**********************************************************************************************************************
648
649 int AmovaCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
650         try {
651                 vector<SharedRAbundVector*> subset;
652                 EstOutput data;
653                 
654                 //for each calculator
655                 for(int i = 0 ; i < calculators.size(); i++) {
656                         
657                         //create a new filename
658                         ofstream out;
659                         string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova" + pidValue;                         
660                         m->openOutputFileAppend(outputFile, out);
661                         out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
662                         
663                         //for each combo
664                         for (int c = start; c < (start+num); c++) {
665                                 
666                                 if (m->control_pressed) { out.close(); return 0; }
667                                 
668                                 //get set names
669                                 vector<string> setNames;
670                                 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
671                                 
672                                 vector<SharedRAbundVector*> thisCombosLookup;
673                                 vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
674                                 for (int k = 0; k < thisLookup.size(); k++) {
675                                         string thisGroup = thisLookup[k]->getGroup();
676                                         
677                                         //is this group for a set we want to compare??
678                                         if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {  
679                                                 thisCombosLookup.push_back(thisLookup[k]);
680                                                 thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
681                                         }
682                                         
683                                 }
684                                 
685                                 int numGroups = thisCombosLookup.size();
686                         
687                                 //calc the distance matrix
688                                 matrix.clear();
689                                 matrix.resize(numGroups);
690                                 for (int k = 0; k < matrix.size(); k++) {       for (int j = 0; j < matrix.size(); j++) {       matrix[k].push_back(1.0);       }       }
691                                 
692                                 if (thisCombosLookup.size() != 0)  { 
693                                         m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine(); 
694                                 }else{
695                                         
696                                         out << thisLookup[0]->getLabel() << '\t';
697                                         if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
698                                         else { out << "all" << '\t'; }
699                                         
700                                         for (int k = 0; k < thisCombosLookup.size(); k++) { 
701                                                 for (int l = k; l < thisCombosLookup.size(); l++) {
702                                                         
703                                                         if (m->control_pressed) { out.close(); return 0; }
704                                                         
705                                                         if (k != l) { //we dont need to similiarity of a groups to itself
706                                                                 //get estimated similarity between 2 groups
707                                                                 subset.clear(); //clear out old pair of sharedrabunds
708                                                                 //add new pair of sharedrabunds
709                                                                 subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]); 
710                                                                 
711                                                                 //if this calc needs all groups to calculate the pair load all groups
712                                                                 if (calculators[i]->getNeedsAll()) { 
713                                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
714                                                                         for (int w = 0; w < thisCombosLookup.size(); w++) {
715                                                                                 if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
716                                                                         }
717                                                                 }
718                                                                 
719                                                                 data = calculators[i]->getValues(subset); //saves the calculator outputs
720                                                                 
721                                                                 //save values in similarity matrix
722                                                                 matrix[k][l] = 1.0 - data[0];
723                                                                 matrix[l][k] = 1.0 - data[0];
724                                                         }
725                                                 }
726                                         }
727                                         
728                                         //calc amova
729                                         calcAmova(out, setNames.size(), thisCombosLookupSets);
730                                 }
731                         }
732                         
733                         out.close();
734                 }               
735                 
736                 return 0;
737                 
738         }
739         catch(exception& e) {
740                 m->errorOut(e, "AmovaCommand", "driver");
741                 exit(1);
742         }
743 }
744 //**********************************************************************************************************************
745
746 int AmovaCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
747         try {
748                 
749                 //for each calculator
750                 for(int i = 0 ; i < calculators.size(); i++) {
751                         
752                         //create a new filename
753                         ofstream out;
754                         string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova" + pidValue;                         
755                         m->openOutputFileAppend(outputFile, out);
756                         out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
757                         
758                         //for each combo
759                         for (int c = start; c < (start+num); c++) {
760                                 
761                                 if (m->control_pressed) { out.close(); return 0; }
762                                 
763                                 //get set names
764                                 vector<string> setNames;
765                                 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
766                                 
767                                 vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
768                                 set<int> indexes;
769                                 for (int k = 0; k < names.size(); k++) {
770                                         //is this group for a set we want to compare??
771                                         if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {  
772                                                 thisCombosSets.push_back(designMap->getGroup(names[k]));        
773                                                 indexes.insert(k); //save indexes of valid rows in matrix for submatrix
774                                         }
775                                 }
776                                 
777                                 int numGroups = thisCombosSets.size();
778                                 
779                                 //calc the distance matrix
780                                 matrix.clear();
781                                 matrix.resize(numGroups);
782                                 for (int k = 0; k < matrix.size(); k++) {       for (int j = 0; j < matrix.size(); j++) {       matrix[k].push_back(1.0);       }       }
783                                 
784                                 if (thisCombosSets.size() != 0)  { 
785                                         m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine(); 
786                                 }else{
787                                         
788                                         if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
789                                         else { out << "all" << '\t'; }
790                                         
791                                         //fill submatrix
792                                         int rowCount = 0;
793                                         int columnCount = 0;
794                                         for (int j = 0; j < completeMatrix.size(); j++) {
795                                                 
796                                                 if (indexes.count(j) != 0) { //we want at least part of this row
797                                                         for (int k = 0; k < completeMatrix[j].size(); k++) {
798                                                                 
799                                                                 if (indexes.count(k) != 0) { //we want this distance
800                                                                         matrix[rowCount][columnCount] = completeMatrix[j][k];
801                                                                         matrix[columnCount][rowCount] = completeMatrix[j][k];
802                                                                         columnCount++;
803                                                                 }
804                                                         }
805                                                         rowCount++;
806                                                 }
807                                         }
808                                         
809                                         //calc amova
810                                         calcAmova(out, setNames.size(), thisCombosSets);
811                                 }
812                         }
813                         
814                         out.close();
815                 }               
816                 
817                 return 0;
818                 
819         }
820         catch(exception& e) {
821                 m->errorOut(e, "AmovaCommand", "driver");
822                 exit(1);
823         }
824 }
825 //**********************************************************************************************************************
826 int AmovaCommand::calcAmova(ofstream& out, int numTreatments, vector<string> thisCombosLookupSets) {
827         try {
828                 
829                 double SSWithin, SSTotal, SSAmoung, MSWithin, MSAmoung, FStatistic, pValue;
830                 
831                 SSWithin = calcWithin(matrix, numTreatments, thisCombosLookupSets);
832                 SSTotal = calcTotal(numTreatments);
833                 
834                 int count = 0;
835                 for (int i = 0; i < iters; i++) {
836                         if (m->control_pressed) { break; }
837                         
838                         //randomly shuffle names to randomize the matrix
839                         vector<string> copyNames = thisCombosLookupSets;
840                         random_shuffle(copyNames.begin(), copyNames.end());
841                         
842                         double randomSSWithin = calcWithin(matrix, numTreatments, copyNames);
843                         
844                         if (randomSSWithin <= SSWithin) { count++; }
845                 }
846                 
847                 SSAmoung = SSTotal - SSWithin;
848                 MSWithin = SSWithin / (double) (matrix.size() - numTreatments);
849                 MSAmoung = SSAmoung / (double) (numTreatments - 1);
850                 FStatistic = MSAmoung / MSWithin;
851                 
852                 pValue = count / (float) iters;
853                 
854                 out << MSWithin << '\t' << MSAmoung << '\t' << FStatistic << '\t' << pValue << endl;
855                 
856                 return 0;
857         }
858         catch(exception& e) {
859                 m->errorOut(e, "AmovaCommand", "calcAmova");
860                 exit(1);
861         }
862 }
863 //**********************************************************************************************************************
864 double AmovaCommand::calcWithin(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets) {
865         try {
866                 double within = 0.0;
867                 
868                 //traverse lower triangle
869                 for (int k = 0; k < thisMatrix.size(); k++) { 
870                         for (int l = k; l < thisMatrix[k].size(); l++) {
871                                 
872                                 //if you are from the same treatment then eij is 1 so add, else eij = 0
873                                 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) { 
874                                         within += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
875                                 }
876                         }
877                 }
878                 
879                 //1 / (numSamples / numTreatments)
880                 within *= (1.0 / (float) (thisMatrix.size() / (float) numTreatments));
881                 
882                 return within;
883         }
884         catch(exception& e) {
885                 m->errorOut(e, "AmovaCommand", "calcWithin");
886                 exit(1);
887         }
888 }
889 //**********************************************************************************************************************
890 double AmovaCommand::calcTotal(int numTreatments) {
891         try {
892                 double total = 0.0;
893                 
894                 //traverse lower triangle
895                 for (int k = 0; k < matrix.size(); k++) { 
896                         for (int l = k; l < matrix[k].size(); l++) {
897                                 total += (matrix[k][l] * matrix[k][l]); //dij^2
898                         }
899                 }
900                 
901                 //1 / numSamples
902                 total *= (1.0 / (float) matrix.size());
903                 
904                 return total;
905         }
906         catch(exception& e) {
907                 m->errorOut(e, "AmovaCommand", "calcTotal");
908                 exit(1);
909         }
910 }
911 //**********************************************************************************************************************
912