]> git.donarmstrong.com Git - mothur.git/blob - amovacommand.cpp
mods to amova command
[mothur.git] / amovacommand.cpp
1 /*
2  *  amovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "amovacommand.h"
11 #include "sharedutilities.h"
12 //#include "sharedsobscollectsummary.h"
13 //#include "sharedchao1.h"
14 //#include "sharedace.h"
15 //#include "sharednseqs.h"
16 //#include "sharedjabund.h"
17 //#include "sharedsorabund.h"
18 //#include "sharedjclass.h"
19 //#include "sharedsorclass.h"
20 //#include "sharedjest.h"
21 //#include "sharedsorest.h"
22 //#include "sharedthetayc.h"
23 //#include "sharedthetan.h"
24 //#include "sharedkstest.h"
25 //#include "whittaker.h"
26 //#include "sharedochiai.h"
27 //#include "sharedanderbergs.h"
28 //#include "sharedkulczynski.h"
29 //#include "sharedkulczynskicody.h"
30 //#include "sharedlennon.h"
31 //#include "sharedmorisitahorn.h"
32 //#include "sharedbraycurtis.h"
33 //#include "sharedjackknife.h"
34 //#include "whittaker.h"
35 //#include "odum.h"
36 //#include "canberra.h"
37 //#include "structeuclidean.h"
38 //#include "structchord.h"
39 //#include "hellinger.h"
40 //#include "manhattan.h"
41 //#include "structpearson.h"
42 //#include "soergel.h"
43 //#include "spearman.h"
44 //#include "structkulczynski.h"
45 //#include "structchi2.h"
46 //#include "speciesprofile.h"
47 //#include "hamming.h"
48 //#include "gower.h"
49 //#include "memchi2.h"
50 //#include "memchord.h"
51 //#include "memeuclidean.h"
52 //#include "mempearson.h"
53
54 //**********************************************************************************************************************
55 vector<string> AmovaCommand::getValidParameters(){      
56         try {
57                 string Array[] =  {"groups","label","outputdir","iters","phylip","design","alpha", 
58                         //"sets","processors"
59                         "inputdir"};
60                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
61                 return myArray;
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "AmovaCommand", "getValidParameters");
65                 exit(1);
66         }
67 }
68 //**********************************************************************************************************************
69 AmovaCommand::AmovaCommand(){   
70         try {
71                 abort = true; calledHelp = true; 
72                 vector<string> tempOutNames;
73                 outputTypes["amova"] = tempOutNames;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
77                 exit(1);
78         }
79 }
80 //**********************************************************************************************************************
81 vector<string> AmovaCommand::getRequiredParameters(){   
82         try {
83                 string Array[] =  {"design"};
84                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
85                 return myArray;
86         }
87         catch(exception& e) {
88                 m->errorOut(e, "AmovaCommand", "getRequiredParameters");
89                 exit(1);
90         }
91 }
92 //**********************************************************************************************************************
93 vector<string> AmovaCommand::getRequiredFiles(){        
94         try {
95                 string Array[] =  {};
96                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
97                 return myArray;
98         }
99         catch(exception& e) {
100                 m->errorOut(e, "AmovaCommand", "getRequiredFiles");
101                 exit(1);
102         }
103 }
104 //**********************************************************************************************************************
105
106 AmovaCommand::AmovaCommand(string option) {
107         try {
108                 globaldata = GlobalData::getInstance();
109                 abort = false; calledHelp = false;   
110 //              allLines = 1;
111 //              labels.clear();
112                 
113                 //allow user to run help
114                 if(option == "help") { help(); abort = true; calledHelp = true; }
115                 
116                 else {
117                         //valid paramters for this command
118                         string AlignArray[] =  {"groups","label","outputdir","iters","phylip","alpha",
119 //                              "design","sets","processors",
120                                 "inputdir"};
121                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
122                         
123                         OptionParser parser(option);
124                         map<string,string> parameters = parser.getParameters();
125                         
126                         ValidParameters validParameter;
127                         
128                         //check to make sure all parameters are valid for command
129                         map<string,string>::iterator it;
130                         for (it = parameters.begin(); it != parameters.end(); it++) { 
131                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
132                         }
133                         
134                         //initialize outputTypes
135                         vector<string> tempOutNames;
136                         outputTypes["amova"] = tempOutNames;
137                         
138                         //if the user changes the output directory command factory will send this info to us in the output parameter 
139                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
140                         
141                         //if the user changes the input directory command factory will send this info to us in the output parameter 
142                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
143                         if (inputDir == "not found"){   inputDir = "";          }
144                         else {
145                                 string path;
146                                 it = parameters.find("design");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
152                                 }
153                                 
154                                 it = parameters.find("phylip");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
160                                 }
161                         }
162                         
163                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
164                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
165                         else if (phylipFileName == "not found") { phylipFileName = ""; }        
166                         else {
167                                 globaldata->newRead();
168                                 globaldata->setPhylipFile(phylipFileName);
169                         }
170                         
171                         //check for required parameters
172                         designFileName = validParameter.validFile(parameters, "design", true);
173                         if (designFileName == "not open") { abort = true; }
174                         else if (designFileName == "not found") {
175                                 designFileName = "";
176                                 m->mothurOut("You must provide an design file.");
177                                 m->mothurOutEndLine();
178                                 abort = true;
179                         }       
180
181                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
182                         convert(temp, iters); 
183                         
184                         temp = validParameter.validFile(parameters, "alpha", false);                    if (temp == "not found") { temp = "0.05"; }
185                         convert(temp, experimentwiseAlpha); 
186                         
187 //                      make sure the user has already run the read.otu command
188 //                      if ((globaldata->getSharedFile() == "")) {
189 //                              if ((phylipfile == "")) {
190 //                                      m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the amova command.");
191 //                                      m->mothurOutEndLine();
192 //                                      abort = true; 
193 //                              }
194 //                      }else { sharedfile = globaldata->getSharedFile(); } 
195 //                              
196 //                      //use distance matrix if one is provided
197 //                      if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
198 //                      
199 //                      //check for optional parameter and set defaults
200 //                      // ...at some point should added some additional type checking...
201 //                      label = validParameter.validFile(parameters, "label", false);                   
202 //                      if (label == "not found") { label = ""; }
203 //                      else { 
204 //                              if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
205 //                              else { allLines = 1;  }
206 //                      }
207 //                      
208 //                      //if the user has not specified any labels use the ones from read.otu
209 //                      if (label == "") {  
210 //                              allLines = globaldata->allLines; 
211 //                              labels = globaldata->labels; 
212 //                      }
213 //                      
214 //                      groups = validParameter.validFile(parameters, "groups", false);                 
215 //                      if (groups == "not found") { groups = "";  }
216 //                      else { 
217 //                              m->splitAtDash(groups, Groups);
218 //                              globaldata->Groups = Groups;
219 //                      }
220 //                      
221 //                      sets = validParameter.validFile(parameters, "sets", false);                     
222 //                      if (sets == "not found") { sets = ""; pickedGroups = false; }
223 //                      else { 
224 //                              pickedGroups = true;
225 //                              m->splitAtDash(sets, Sets);
226 //                      }
227 //              
228 //                      temp = validParameter.validFile(parameters, "processors", false);                       if (temp == "not found"){       temp = "1";     }
229 //                      convert(temp, processors); 
230 //                      
231 //                      vector<string> Estimators;
232 //                      calc = validParameter.validFile(parameters, "calc", false);                     
233 //                      if (calc == "not found") { calc = "morisitahorn";  }
234 //                      m->splitAtDash(calc, Estimators);
235 //                              
236 //                      if (abort == false) {
237 //                              
238 //                              ValidCalculators* validCalculator = new ValidCalculators();
239 //                              
240 //                              for (int i=0; i<Estimators.size(); i++) {
241 //                                      if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) { 
242 //                                              if (Estimators[i] == "sharedsobs") { 
243 //                                                      calculators.push_back(new SharedSobsCS());
244 //                                              }else if (Estimators[i] == "sharedchao") { 
245 //                                                      calculators.push_back(new SharedChao1());
246 //                                              }else if (Estimators[i] == "sharedace") { 
247 //                                                      calculators.push_back(new SharedAce());
248 //                                              }else if (Estimators[i] == "jabund") {  
249 //                                                      calculators.push_back(new JAbund());
250 //                                              }else if (Estimators[i] == "sorabund") { 
251 //                                                      calculators.push_back(new SorAbund());
252 //                                              }else if (Estimators[i] == "jclass") { 
253 //                                                      calculators.push_back(new Jclass());
254 //                                              }else if (Estimators[i] == "sorclass") { 
255 //                                                      calculators.push_back(new SorClass());
256 //                                              }else if (Estimators[i] == "jest") { 
257 //                                                      calculators.push_back(new Jest());
258 //                                              }else if (Estimators[i] == "sorest") { 
259 //                                                      calculators.push_back(new SorEst());
260 //                                              }else if (Estimators[i] == "thetayc") { 
261 //                                                      calculators.push_back(new ThetaYC());
262 //                                              }else if (Estimators[i] == "thetan") { 
263 //                                                      calculators.push_back(new ThetaN());
264 //                                              }else if (Estimators[i] == "kstest") { 
265 //                                                      calculators.push_back(new KSTest());
266 //                                              }else if (Estimators[i] == "sharednseqs") { 
267 //                                                      calculators.push_back(new SharedNSeqs());
268 //                                              }else if (Estimators[i] == "ochiai") { 
269 //                                                      calculators.push_back(new Ochiai());
270 //                                              }else if (Estimators[i] == "anderberg") { 
271 //                                                      calculators.push_back(new Anderberg());
272 //                                              }else if (Estimators[i] == "kulczynski") { 
273 //                                                      calculators.push_back(new Kulczynski());
274 //                                              }else if (Estimators[i] == "kulczynskicody") { 
275 //                                                      calculators.push_back(new KulczynskiCody());
276 //                                              }else if (Estimators[i] == "lennon") { 
277 //                                                      calculators.push_back(new Lennon());
278 //                                              }else if (Estimators[i] == "morisitahorn") { 
279 //                                                      calculators.push_back(new MorHorn());
280 //                                              }else if (Estimators[i] == "braycurtis") { 
281 //                                                      calculators.push_back(new BrayCurtis());
282 //                                              }else if (Estimators[i] == "whittaker") { 
283 //                                                      calculators.push_back(new Whittaker());
284 //                                              }else if (Estimators[i] == "odum") { 
285 //                                                      calculators.push_back(new Odum());
286 //                                              }else if (Estimators[i] == "canberra") { 
287 //                                                      calculators.push_back(new Canberra());
288 //                                              }else if (Estimators[i] == "structeuclidean") { 
289 //                                                      calculators.push_back(new StructEuclidean());
290 //                                              }else if (Estimators[i] == "structchord") { 
291 //                                                      calculators.push_back(new StructChord());
292 //                                              }else if (Estimators[i] == "hellinger") { 
293 //                                                      calculators.push_back(new Hellinger());
294 //                                              }else if (Estimators[i] == "manhattan") { 
295 //                                                      calculators.push_back(new Manhattan());
296 //                                              }else if (Estimators[i] == "structpearson") { 
297 //                                                      calculators.push_back(new StructPearson());
298 //                                              }else if (Estimators[i] == "soergel") { 
299 //                                                      calculators.push_back(new Soergel());
300 //                                              }else if (Estimators[i] == "spearman") { 
301 //                                                      calculators.push_back(new Spearman());
302 //                                              }else if (Estimators[i] == "structkulczynski") { 
303 //                                                      calculators.push_back(new StructKulczynski());
304 //                                              }else if (Estimators[i] == "speciesprofile") { 
305 //                                                      calculators.push_back(new SpeciesProfile());
306 //                                              }else if (Estimators[i] == "hamming") { 
307 //                                                      calculators.push_back(new Hamming());
308 //                                              }else if (Estimators[i] == "structchi2") { 
309 //                                                      calculators.push_back(new StructChi2());
310 //                                              }else if (Estimators[i] == "gower") { 
311 //                                                      calculators.push_back(new Gower());
312 //                                              }else if (Estimators[i] == "memchi2") { 
313 //                                                      calculators.push_back(new MemChi2());
314 //                                              }else if (Estimators[i] == "memchord") { 
315 //                                                      calculators.push_back(new MemChord());
316 //                                              }else if (Estimators[i] == "memeuclidean") { 
317 //                                                      calculators.push_back(new MemEuclidean());
318 //                                              }else if (Estimators[i] == "mempearson") { 
319 //                                                      calculators.push_back(new MemPearson());
320 //                                              }
321 //                                      }
322 //                              }
323 //                      }
324                 }
325                 
326         }
327         catch(exception& e) {
328                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
329                 exit(1);
330         }
331 }
332
333 //**********************************************************************************************************************
334
335 void AmovaCommand::help(){
336         try {
337                 m->mothurOut("Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.\n");
338 //              m->mothurOut("The amova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
339                 m->mothurOut("The amova command outputs a .amova file. \n");
340                 m->mothurOut("The amova command parameters are phylip, iters, and alpha.  The phylip and design parameters are required.\n");
341                 m->mothurOut("The design parameter allows you to assign your samples to groups when you are running amova. It is required. \n");
342                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n");
343 //              m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile. To run the pairwise comparisons of all sets use sets=all.\n");
344                 m->mothurOut("The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n");
345 //              m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
346 //              m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
347 //              m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
348                 m->mothurOut("The amova command should be in the following format: amova(phylip=file.dist, design=file.design).\n");
349                 m->mothurOut("Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n\n");
350                 
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "AmovaCommand", "help");
354                 exit(1);
355         }
356 }
357
358 //**********************************************************************************************************************
359
360 AmovaCommand::~AmovaCommand(){}
361
362 //**********************************************************************************************************************
363
364 int AmovaCommand::execute(){
365         try {
366                 
367                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
368                 
369                 //read design file
370                 designMap = new GroupMap(designFileName);
371                 designMap->readDesignMap();
372
373                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
374                                                 
375                 //read in distance matrix and square it
376                 ReadPhylipVector readMatrix(phylipFileName);
377                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
378                 int numSamples = sampleNames.size();
379                 
380                 for(int i=0;i<distanceMatrix.size();i++){
381                         for(int j=0;j<i;j++){
382                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
383                         }
384                 }
385                 
386                 //link designMap to rows/columns in distance matrix
387                 map<string, vector<int> > origGroupSampleMap;
388                 for(int i=0;i<numSamples;i++){
389                         origGroupSampleMap[designMap->getGroup(sampleNames[i])].push_back(i);
390                 }
391                 int numGroups = origGroupSampleMap.size();
392                 
393                 //create a new filename
394                 ofstream AMOVAFile;
395                 string AMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName))  + "amova";                         
396                 m->openOutputFile(AMOVAFileName, AMOVAFile);
397                 outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
398
399                 experimentwiseAlpha = 0.05;
400                 
401                 double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha);
402                 if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){
403                         
404                         int numCombos = numGroups * (numGroups-1) / 2;
405                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
406                         
407                         map<string, vector<int> >::iterator itA;
408                         map<string, vector<int> >::iterator itB;
409                         
410                         for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
411                                 itB = itA;itB++;
412                                 for(itB;itB!=origGroupSampleMap.end();itB++){
413                                         
414                                         map<string, vector<int> > pairwiseGroupSampleMap;
415                                         pairwiseGroupSampleMap[itA->first] = itA->second;
416                                         pairwiseGroupSampleMap[itB->first] = itB->second;
417                                         
418                                         runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
419                                 }                       
420                         }
421                         cout << "Experiment-wise error rate: " << experimentwiseAlpha << endl;
422                         cout << "Pair-wise error rate (Bonferroni): " << pairwiseAlpha << endl;
423                 }
424                 else{
425                         cout << "Experiment-wise error rate: " << experimentwiseAlpha << endl;
426                 }
427
428                 cout << "If you have borderline P-values, you should try increasing the number of iterations" << endl;
429
430                 
431 //              //make sure sets are all in designMap
432 //              SharedUtil* util = new SharedUtil();  
433 //              util->setGroups(Sets, designMap->namesOfGroups);  
434 //              delete util;
435 //              
436 //              //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
437 //              if (pickedGroups) {
438 //                      for (int a=0; a<numGroups; a++) { 
439 //                              for (int l = 0; l < a; l++) {   
440 //                                      vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
441 //                                      namesOfGroupCombos.push_back(groups);
442 //                              }
443 //                      }
444 //              }else { //one giant compare
445 //                      vector<string> groups;
446 //                      for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
447 //                      namesOfGroupCombos.push_back(groups);
448 //              }
449 //              
450 //              //only 1 combo
451 //              if (numGroups == 2) { processors = 1; }
452 //              else if (numGroups < 2) {
453 //                      m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command.");
454 //                      m->mothurOutEndLine(); m->control_pressed = true;
455 //              }
456 //              
457 //              #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
458 //                      if(processors != 1){
459 //                              int numPairs = namesOfGroupCombos.size();
460 //                              int numPairsPerProcessor = numPairs / processors;
461 //                      
462 //                              for (int i = 0; i < processors; i++) {
463 //                                      int startPos = i * numPairsPerProcessor;
464 //                                      if(i == processors - 1){
465 //                                              numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
466 //                                      }
467 //                                      lines.push_back(linePair(startPos, numPairsPerProcessor));
468 //                              }
469 //                      }
470 //              #endif
471 //              
472 //              if (sharedfile != "") { //create distance matrix for each label
473 //                      
474 //                      if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
475 //                      
476 //                      //for each calculator
477 //                      for(int i = 0 ; i < calculators.size(); i++) {
478 //                              
479 //                              //create a new filename
480 //                              ofstream out;
481 //                              string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";
482 //                              m->openOutputFile(outputFile, out);
483 //                              outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
484 //                              
485 //                              //print headers
486 //                              out << "label\tgroupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;  
487 //                              out.close();
488 //                              m->mothurOut("label\tgroupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue\n");  
489 //                      }
490 //                      
491 //                      InputData input(sharedfile, "sharedfile");
492 //                      vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
493 //                      string lastLabel = lookup[0]->getLabel();
494 //                      
495 //                      //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
496 //                      set<string> processedLabels;
497 //                      set<string> userLabels = labels;
498 //                      
499 //                      //sanity check between shared file groups and design file groups
500 //                      for (int i = 0; i < lookup.size(); i++) { 
501 //                              string group = designMap->getGroup(lookup[i]->getGroup());
502 //                              
503 //                              if (group == "not found") { m->control_pressed = true;  m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine();  }
504 //                      }
505 //                      
506 //                      //as long as you are not at the end of the file or done wih the lines you want
507 //                      while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
508 //                              
509 //                              if (m->control_pressed) {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } return 0; }
510 //                              
511 //                              if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
512 //                                      
513 //                                      m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
514 //                                      process(lookup);
515 //                                      
516 //                                      processedLabels.insert(lookup[0]->getLabel());
517 //                                      userLabels.erase(lookup[0]->getLabel());
518 //                              }
519 //                              
520 //                              if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
521 //                                      string saveLabel = lookup[0]->getLabel();
522 //                                      
523 //                                      for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
524 //                                      lookup = input.getSharedRAbundVectors(lastLabel);
525 //                                      m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
526 //                                      
527 //                                      process(lookup);
528 //                                      
529 //                                      processedLabels.insert(lookup[0]->getLabel());
530 //                                      userLabels.erase(lookup[0]->getLabel());
531 //                                      
532 //                                      //restore real lastlabel to save below
533 //                                      lookup[0]->setLabel(saveLabel);
534 //                              }
535 //                              
536 //                              lastLabel = lookup[0]->getLabel();
537 //                              //prevent memory leak
538 //                              for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
539 //                              
540 //                              if (m->control_pressed) {  globaldata->Groups.clear();   delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str()); } return 0; }
541 //                              
542 //                              //get next line to process
543 //                              lookup = input.getSharedRAbundVectors();                                
544 //                      }
545 //                      
546 //                      if (m->control_pressed) { globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }  return 0; }
547 //                      
548 //                      //output error messages about any remaining user labels
549 //                      set<string>::iterator it;
550 //                      bool needToRun = false;
551 //                      for (it = userLabels.begin(); it != userLabels.end(); it++) {  
552 //                              m->mothurOut("Your file does not include the label " + *it); 
553 //                              if (processedLabels.count(lastLabel) != 1) {
554 //                                      m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
555 //                                      needToRun = true;
556 //                              }else {
557 //                                      m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
558 //                              }
559 //                      }
560 //                      
561 //                      //run last label if you need to
562 //                      if (needToRun == true)  {
563 //                              for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
564 //                              lookup = input.getSharedRAbundVectors(lastLabel);
565 //                              
566 //                              m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
567 //                              
568 //                              process(lookup);
569 //                              
570 //                              for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
571 //                      }
572 //                      
573 //                      //reset groups parameter
574 //                      globaldata->Groups.clear();  
575 //                      
576 //                      
577 //              }else { //user provided distance matrix
578 //                      
579 //                      if (outputDir == "") { outputDir = m->hasPath(phylipFile); }
580 //                                      
581 //                      //create a new filename
582 //                      ofstream out;
583 //                      string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipFile))  + "amova";                                
584 //                      m->openOutputFile(outputFile, out);
585 //                      outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
586 //                      
587 //                      //print headers
588 //                      out << "groupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;  
589 //                      out.close();
590 //                      m->mothurOut("groupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue\n");  
591 //                      
592 //                      ReadPhylipVector readMatrix(phylipFile);
593 //                      vector< vector<double> > completeMatrix;
594 //                      vector<string> names = readMatrix.read(completeMatrix);
595 //                      
596 //                      #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
597 //                              if(processors == 1){
598 //                                      driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
599 //                              }else{
600 //                                      int process = 1;
601 //                                      vector<int> processIDS;
602 //                                      
603 //                                      //loop through and create all the processes you want
604 //                                      while (process != processors) {
605 //                                              int pid = fork();
606 //                                              
607 //                                              if (pid > 0) {
608 //                                                      processIDS.push_back(pid);  
609 //                                                      process++;
610 //                                              }else if (pid == 0){
611 //                                                      driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
612 //                                                      exit(0);
613 //                                              }else { 
614 //                                                      m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
615 //                                                      for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
616 //                                                      exit(0);
617 //                                              }
618 //                                      }
619 //                                      
620 //                                      //do my part
621 //                                      driver(lines[0].start, lines[0].num, names, "", completeMatrix);
622 //                                      
623 //                                      //force parent to wait until all the processes are done
624 //                                      for (int i=0;i<(processors-1);i++) { 
625 //                                              int temp = processIDS[i];
626 //                                              wait(&temp);
627 //                                      }
628 //                                      
629 //                                      //append files
630 //                                      string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipFile)) + "amova";                         
631 //                                      for (int j = 0; j < processIDS.size(); j++) {
632 //                                              m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
633 //                                              remove((outputFile + toString(processIDS[j])).c_str());
634 //                                      }
635 //                                      
636 //                              }
637 //                      #else
638 //                              driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
639 //                      #endif
640 //                      
641 //              }
642                 
643                 delete designMap;
644          
645                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
646
647                 m->mothurOutEndLine();
648                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
649                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
650                 m->mothurOutEndLine();
651                 
652                 return 0;
653         }
654         catch(exception& e) {
655                 m->errorOut(e, "AmovaCommand", "execute");
656                 exit(1);
657         }
658 }
659
660 //**********************************************************************************************************************
661
662 double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > groupSampleMap, double alpha) {
663         try {
664                 map<string, vector<int> >::iterator it;
665
666                 int numGroups = groupSampleMap.size();
667                 int numSamples = 0;
668
669                 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
670                         numSamples += it->second.size();                        
671                 }
672
673                 double ssTotalOrig = calcSSTotal(groupSampleMap);
674                 double ssWithinOrig = calcSSWithin(groupSampleMap);
675                 double ssAmongOrig = ssTotalOrig - ssWithinOrig;
676                 
677                 double counter = 0;
678                 for(int i=0;i<iters;i++){
679                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
680                         double ssWithinRand = calcSSWithin(randomizedGroup);
681                         if(ssWithinRand < ssWithinOrig){        counter++;      }
682                 }
683                 
684                 double pValue = (double)counter / (double) iters;
685                 string pString = "";
686                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
687                 else                                            {       pString = toString(pValue);                                     }
688                 
689                 
690                 //print anova table
691                 it = groupSampleMap.begin();
692                 cout << it->first;
693                 it++;
694                 for(it;it!=groupSampleMap.end();it++){
695                         cout << " vs. " << it->first;
696                 }cout << endl;
697                 cout << "ANOVA\tAmong\tWithin\tTotal" << endl;
698                 cout << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl;
699                 
700                 int dfAmong = numGroups - 1;                    double MSAmong = ssAmongOrig / (double) dfAmong;
701                 int dfWithin = numSamples - numGroups;  double MSWithin = ssWithinOrig / (double) dfWithin;
702                 int dfTotal = numSamples - 1;                   double Fs = MSAmong / MSWithin;
703                 
704                 cout << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl;
705                 cout << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl;
706                 cout << "Fs:\t" << Fs << endl;
707                 cout << "p-value: " << pString;
708                 if(pValue < alpha){     cout << " ***"; }
709                 cout << endl << endl;
710
711                 return pValue;
712         }
713         catch(exception& e) {
714                 m->errorOut(e, "AmovaCommand", "calcAmova");
715                 exit(1);
716         }
717 }
718
719 //**********************************************************************************************************************
720
721 map<string, vector<int> > AmovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
722         try{
723                 vector<int> sampleIndices;
724                 vector<int> samplesPerGroup;
725                 
726                 map<string, vector<int> >::iterator it;
727                 for(it=origMapping.begin();it!=origMapping.end();it++){
728                         vector<int> indices = it->second;
729                         samplesPerGroup.push_back(indices.size());
730                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
731                 }
732                 
733                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
734                 
735                 int index = 0;
736                 map<string, vector<int> > randomizedGroups = origMapping;
737                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
738                         for(int i=0;i<it->second.size();i++){
739                                 it->second[i] = sampleIndices[index++];                         
740                         }
741                 }
742
743                 return randomizedGroups;                
744         }
745         catch (exception& e) {
746                 m->errorOut(e, "AmovaCommand", "randomizeGroups");
747                 exit(1);
748         }
749 }
750
751 //**********************************************************************************************************************
752
753 double AmovaCommand::calcSSTotal(map<string, vector<int> >& groupSampleMap) {
754         try {
755                 
756                 vector<int> indices;
757                 map<string, vector<int> >::iterator it;
758                 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
759                         indices.insert(indices.end(), it->second.begin(), it->second.end());                    
760                 }
761                 sort(indices.begin(), indices.end());
762                         
763                 int numIndices =indices.size();
764                 double ssTotal = 0.0;
765                 
766                 for(int i=1;i<numIndices;i++){
767                         int row = indices[i];
768                         
769                         for(int j=0;j<i;j++){
770                                 ssTotal += distanceMatrix[row][indices[j]];
771                         }
772                 }
773                 ssTotal /= numIndices;
774                         
775                 return ssTotal;
776         }
777         catch(exception& e) {
778                 m->errorOut(e, "AmovaCommand", "calcSSTotal");
779                 exit(1);
780         }
781 }
782
783 //**********************************************************************************************************************
784
785 double AmovaCommand::calcSSWithin(map<string, vector<int> >& groupSampleMap) {
786         try {
787
788                 double ssWithin = 0.0;
789                 
790                 map<string, vector<int> >::iterator it;
791                 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
792                         
793                         double withinGroup = 0;
794                         
795                         vector<int> samples = it->second;
796                         
797                         for(int i=0;i<samples.size();i++){
798                                 int row = samples[i];
799
800                                 for(int j=0;j<samples.size();j++){
801                                         int col = samples[j];
802
803                                         if(col < row){
804                                                 withinGroup += distanceMatrix[row][col];
805                                         }
806                                         
807                                 }
808                         }
809
810                         ssWithin += withinGroup / samples.size();
811                 }
812
813                 return ssWithin;
814         }
815         catch(exception& e) {
816                 m->errorOut(e, "AmovaCommand", "calcWithin");
817                 exit(1);
818         }
819 }
820
821 //**********************************************************************************************************************
822
823 //int AmovaCommand::process(vector<SharedRAbundVector*> thisLookup) {
824 //      try{
825 //              
826 //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
827 //              if(processors == 1){
828 //                      driver(0, namesOfGroupCombos.size(), thisLookup, "");
829 //              }else{
830 //                      int process = 1;
831 //                      vector<int> processIDS;
832 //                      
833 //                      //loop through and create all the processes you want
834 //                      while (process != processors) {
835 //                              int pid = fork();
836 //                              
837 //                              if (pid > 0) {
838 //                                      processIDS.push_back(pid);  
839 //                                      process++;
840 //                              }else if (pid == 0){
841 //                                      driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
842 //                                      exit(0);
843 //                              }else { 
844 //                                      m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
845 //                                      for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
846 //                                      exit(0);
847 //                              }
848 //                      }
849 //                      
850 //                      //do my part
851 //                      driver(lines[0].start, lines[0].num, thisLookup, "");
852 //                      
853 //                      //force parent to wait until all the processes are done
854 //                      for (int i=0;i<(processors-1);i++) { 
855 //                              int temp = processIDS[i];
856 //                              wait(&temp);
857 //                      }
858 //                      
859 //                      //append files
860 //                      for(int i = 0 ; i < calculators.size(); i++) {
861 //                              string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";                            
862 //
863 //                              for (int j = 0; j < processIDS.size(); j++) {
864 //                                      m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
865 //                                      remove((outputFile + toString(processIDS[j])).c_str());
866 //                              }
867 //                      }
868 //              }
869 //#else
870 //              driver(0, namesOfGroupCombos.size(), thisLookUp, "");
871 //#endif
872 //                      
873 //              return 0;
874 //      }
875 //      catch(exception& e) {
876 //              m->errorOut(e, "AmovaCommand", "process");
877 //              exit(1);
878 //      }
879 //}
880
881 //**********************************************************************************************************************
882
883 //int AmovaCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
884 //      try {
885 //              vector<SharedRAbundVector*> subset;
886 //              EstOutput data;
887 //              
888 //              //for each calculator
889 //              for(int i = 0 ; i < calculators.size(); i++) {
890 //                      
891 //                      //create a new filename
892 //                      ofstream out;
893 //                      string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova" + pidValue;                         
894 //                      m->openOutputFileAppend(outputFile, out);
895 //                      out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
896 //                      
897 //                      //for each combo
898 //                      for (int c = start; c < (start+num); c++) {
899 //                              
900 //                              if (m->control_pressed) { out.close(); return 0; }
901 //                              
902 //                              //get set names
903 //                              vector<string> setNames;
904 //                              for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
905 //                              
906 //                              vector<SharedRAbundVector*> thisCombosLookup;
907 //                              vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
908 //                              for (int k = 0; k < thisLookup.size(); k++) {
909 //                                      string thisGroup = thisLookup[k]->getGroup();
910 //                                      
911 //                                      //is this group for a set we want to compare??
912 //                                      if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {  
913 //                                              thisCombosLookup.push_back(thisLookup[k]);
914 //                                              thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
915 //                                      }
916 //                                      
917 //                              }
918 //                              
919 //                              int numGroups = thisCombosLookup.size();
920 //                      
921 //                              //calc the distance matrix
922 //                              matrix.clear();
923 //                              matrix.resize(numGroups);
924 //                              for (int k = 0; k < matrix.size(); k++) {       for (int j = 0; j < matrix.size(); j++) {       matrix[k].push_back(1.0);       }       }
925 //                              
926 //                              if (thisCombosLookup.size() == 0)  { 
927 //                                      m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine(); 
928 //                              }else{
929 //                                      
930 //                                      m->mothurOut(thisLookup[0]->getLabel() + '\t');
931 //                                      out << thisLookup[0]->getLabel() << '\t';
932 //                                      if (setNames.size() == 2) {
933 //                                              m->mothurOut(setNames[0] + '-' + setNames[1] + '\t');
934 //                                              out << setNames[0] << "-" << setNames[1] << '\t'; 
935 //                                      }
936 //                                      else {
937 //                                              out << "all" << '\t'; 
938 //                                              m->mothurOut("all\t");
939 //                                      }
940 //                                      
941 //                                      for (int k = 0; k < thisCombosLookup.size(); k++) { 
942 //                                              for (int l = k; l < thisCombosLookup.size(); l++) {
943 //                                                      
944 //                                                      if (m->control_pressed) { out.close(); return 0; }
945 //                                                      
946 //                                                      if (k != l) { //we dont need to similiarity of a groups to itself
947 //                                                              //get estimated similarity between 2 groups
948 //                                                              subset.clear(); //clear out old pair of sharedrabunds
949 //                                                              //add new pair of sharedrabunds
950 //                                                              subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]); 
951 //                                                              
952 //                                                              //if this calc needs all groups to calculate the pair load all groups
953 //                                                              if (calculators[i]->getNeedsAll()) { 
954 //                                                                      //load subset with rest of lookup for those calcs that need everyone to calc for a pair
955 //                                                                      for (int w = 0; w < thisCombosLookup.size(); w++) {
956 //                                                                              if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
957 //                                                                      }
958 //                                                              }
959 //                                                              
960 //                                                              data = calculators[i]->getValues(subset); //saves the calculator outputs
961 //                                                              
962 //                                                              //save values in similarity matrix
963 //                                                              matrix[k][l] = 1.0 - data[0];
964 //                                                              matrix[l][k] = 1.0 - data[0];
965 //                                                      }
966 //                                              }
967 //                                      }
968 //                                      
969 //                                      //calc amova
970 //                                      calcAmova(out, setNames.size(), thisCombosLookupSets);
971 //                              }
972 //                      }
973 //                      
974 //                      out.close();
975 //              }               
976 //              
977 //              return 0;
978 //              
979 //      }
980 //      catch(exception& e) {
981 //              m->errorOut(e, "AmovaCommand", "driver");
982 //              exit(1);
983 //      }
984 //}
985 //
986 //**********************************************************************************************************************
987 //
988 //int AmovaCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
989 //      try {
990 //              
991 //              //create a new filename
992 //              ofstream out;
993 //              string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipFile)) + "amova" + pidValue;                              
994 //              m->openOutputFileAppend(outputFile, out);
995 //              out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
996 //              
997 //              //for each combo
998 //              for (int c = start; c < (start+num); c++) {
999 //                      
1000 //                      if (m->control_pressed) { out.close(); return 0; }
1001 //                      
1002 //                      //get set names
1003 //                      vector<string> setNames;
1004 //                      for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
1005 //                      
1006 //                      vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
1007 //                      set<int> indexes;
1008 //                      for (int k = 0; k < names.size(); k++) {
1009 //                              //is this group for a set we want to compare??
1010 //                              if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {  
1011 //                                      thisCombosSets.push_back(designMap->getGroup(names[k]));        
1012 //                                      indexes.insert(k); //save indexes of valid rows in matrix for submatrix
1013 //                              }
1014 //                      }
1015 //                      
1016 //                      int numGroups = thisCombosSets.size();
1017 //                      
1018 //                      //calc the distance matrix
1019 //                      matrix.clear();
1020 //                      matrix.resize(numGroups);
1021 //                      
1022 //                      for (int k = 0; k < matrix.size(); k++) {       for (int j = 0; j < matrix.size(); j++) {       matrix[k].push_back(1.0);       }       }
1023 //                      
1024 //                      if (thisCombosSets.size() == 0)  { 
1025 //                              m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine(); 
1026 //                      }else{
1027 //                              
1028 //                              if (setNames.size() == 2) { 
1029 //                                      out << setNames[0] << "-" << setNames[1] << '\t'; 
1030 //                                      m->mothurOut(setNames[0] + '-' + setNames[1] + '\t');
1031 //                              }
1032 //                              else {
1033 //                                      out << "all" << '\t'; 
1034 //                                      m->mothurOut("all\t");
1035 //                              }
1036 //                              
1037 //                              //fill submatrix
1038 //                              int rowCount = 0;
1039 //                              for (int j = 0; j < completeMatrix.size(); j++) {
1040 //                                      
1041 //                                      if (indexes.count(j) != 0) { //we want at least part of this row
1042 //                                              int columnCount = 0;
1043 //                                              for (int k = 0; k < completeMatrix[j].size(); k++) {
1044 //                                                      
1045 //                                                      if (indexes.count(k) != 0) { //we want this distance
1046 //                                                              matrix[rowCount][columnCount] = completeMatrix[j][k];
1047 //                                                              matrix[columnCount][rowCount] = completeMatrix[j][k];
1048 //                                                              columnCount++;
1049 //                                                      }
1050 //                                              }
1051 //                                              rowCount++;
1052 //                                      }
1053 //                              }
1054 //                              
1055 //                              //calc amova
1056 //                              calcAmova(out, setNames.size(), thisCombosSets);
1057 //                      }
1058 //              }
1059 //              
1060 //              out.close();
1061 //              
1062 //      
1063 //              return 0;
1064 //              
1065 //      }
1066 //      catch(exception& e) {
1067 //              m->errorOut(e, "AmovaCommand", "driver");
1068 //              exit(1);
1069 //      }
1070 //}
1071 //
1072 //**********************************************************************************************************************
1073