5 * Created by westcott on 2/7/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "amovacommand.h"
11 #include "sharedutilities.h"
12 //#include "sharedsobscollectsummary.h"
13 //#include "sharedchao1.h"
14 //#include "sharedace.h"
15 //#include "sharednseqs.h"
16 //#include "sharedjabund.h"
17 //#include "sharedsorabund.h"
18 //#include "sharedjclass.h"
19 //#include "sharedsorclass.h"
20 //#include "sharedjest.h"
21 //#include "sharedsorest.h"
22 //#include "sharedthetayc.h"
23 //#include "sharedthetan.h"
24 //#include "sharedkstest.h"
25 //#include "whittaker.h"
26 //#include "sharedochiai.h"
27 //#include "sharedanderbergs.h"
28 //#include "sharedkulczynski.h"
29 //#include "sharedkulczynskicody.h"
30 //#include "sharedlennon.h"
31 //#include "sharedmorisitahorn.h"
32 //#include "sharedbraycurtis.h"
33 //#include "sharedjackknife.h"
34 //#include "whittaker.h"
36 //#include "canberra.h"
37 //#include "structeuclidean.h"
38 //#include "structchord.h"
39 //#include "hellinger.h"
40 //#include "manhattan.h"
41 //#include "structpearson.h"
42 //#include "soergel.h"
43 //#include "spearman.h"
44 //#include "structkulczynski.h"
45 //#include "structchi2.h"
46 //#include "speciesprofile.h"
47 //#include "hamming.h"
49 //#include "memchi2.h"
50 //#include "memchord.h"
51 //#include "memeuclidean.h"
52 //#include "mempearson.h"
54 //**********************************************************************************************************************
55 vector<string> AmovaCommand::getValidParameters(){
57 string Array[] = {"groups","label","outputdir","iters","phylip","design","alpha",
60 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
64 m->errorOut(e, "AmovaCommand", "getValidParameters");
68 //**********************************************************************************************************************
69 AmovaCommand::AmovaCommand(){
71 abort = true; calledHelp = true;
72 vector<string> tempOutNames;
73 outputTypes["amova"] = tempOutNames;
76 m->errorOut(e, "AmovaCommand", "AmovaCommand");
80 //**********************************************************************************************************************
81 vector<string> AmovaCommand::getRequiredParameters(){
83 string Array[] = {"design"};
84 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
88 m->errorOut(e, "AmovaCommand", "getRequiredParameters");
92 //**********************************************************************************************************************
93 vector<string> AmovaCommand::getRequiredFiles(){
96 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
100 m->errorOut(e, "AmovaCommand", "getRequiredFiles");
104 //**********************************************************************************************************************
106 AmovaCommand::AmovaCommand(string option) {
108 globaldata = GlobalData::getInstance();
109 abort = false; calledHelp = false;
113 //allow user to run help
114 if(option == "help") { help(); abort = true; calledHelp = true; }
117 //valid paramters for this command
118 string AlignArray[] = {"groups","label","outputdir","iters","phylip","alpha",
119 // "design","sets","processors",
121 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
123 OptionParser parser(option);
124 map<string,string> parameters = parser.getParameters();
126 ValidParameters validParameter;
128 //check to make sure all parameters are valid for command
129 map<string,string>::iterator it;
130 for (it = parameters.begin(); it != parameters.end(); it++) {
131 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
134 //initialize outputTypes
135 vector<string> tempOutNames;
136 outputTypes["amova"] = tempOutNames;
138 //if the user changes the output directory command factory will send this info to us in the output parameter
139 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
141 //if the user changes the input directory command factory will send this info to us in the output parameter
142 string inputDir = validParameter.validFile(parameters, "inputdir", false);
143 if (inputDir == "not found"){ inputDir = ""; }
146 it = parameters.find("design");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["design"] = inputDir + it->second; }
154 it = parameters.find("phylip");
155 //user has given a template file
156 if(it != parameters.end()){
157 path = m->hasPath(it->second);
158 //if the user has not given a path then, add inputdir. else leave path alone.
159 if (path == "") { parameters["phylip"] = inputDir + it->second; }
163 phylipFileName = validParameter.validFile(parameters, "phylip", true);
164 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
165 else if (phylipFileName == "not found") { phylipFileName = ""; }
167 globaldata->newRead();
168 globaldata->setPhylipFile(phylipFileName);
171 //check for required parameters
172 designFileName = validParameter.validFile(parameters, "design", true);
173 if (designFileName == "not open") { abort = true; }
174 else if (designFileName == "not found") {
176 m->mothurOut("You must provide an design file.");
177 m->mothurOutEndLine();
181 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
182 convert(temp, iters);
184 temp = validParameter.validFile(parameters, "alpha", false); if (temp == "not found") { temp = "0.05"; }
185 convert(temp, experimentwiseAlpha);
187 // make sure the user has already run the read.otu command
188 // if ((globaldata->getSharedFile() == "")) {
189 // if ((phylipfile == "")) {
190 // m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the amova command.");
191 // m->mothurOutEndLine();
194 // }else { sharedfile = globaldata->getSharedFile(); }
196 // //use distance matrix if one is provided
197 // if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
199 // //check for optional parameter and set defaults
200 // // ...at some point should added some additional type checking...
201 // label = validParameter.validFile(parameters, "label", false);
202 // if (label == "not found") { label = ""; }
204 // if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
205 // else { allLines = 1; }
208 // //if the user has not specified any labels use the ones from read.otu
209 // if (label == "") {
210 // allLines = globaldata->allLines;
211 // labels = globaldata->labels;
214 // groups = validParameter.validFile(parameters, "groups", false);
215 // if (groups == "not found") { groups = ""; }
217 // m->splitAtDash(groups, Groups);
218 // globaldata->Groups = Groups;
221 // sets = validParameter.validFile(parameters, "sets", false);
222 // if (sets == "not found") { sets = ""; pickedGroups = false; }
224 // pickedGroups = true;
225 // m->splitAtDash(sets, Sets);
228 // temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
229 // convert(temp, processors);
231 // vector<string> Estimators;
232 // calc = validParameter.validFile(parameters, "calc", false);
233 // if (calc == "not found") { calc = "morisitahorn"; }
234 // m->splitAtDash(calc, Estimators);
236 // if (abort == false) {
238 // ValidCalculators* validCalculator = new ValidCalculators();
240 // for (int i=0; i<Estimators.size(); i++) {
241 // if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
242 // if (Estimators[i] == "sharedsobs") {
243 // calculators.push_back(new SharedSobsCS());
244 // }else if (Estimators[i] == "sharedchao") {
245 // calculators.push_back(new SharedChao1());
246 // }else if (Estimators[i] == "sharedace") {
247 // calculators.push_back(new SharedAce());
248 // }else if (Estimators[i] == "jabund") {
249 // calculators.push_back(new JAbund());
250 // }else if (Estimators[i] == "sorabund") {
251 // calculators.push_back(new SorAbund());
252 // }else if (Estimators[i] == "jclass") {
253 // calculators.push_back(new Jclass());
254 // }else if (Estimators[i] == "sorclass") {
255 // calculators.push_back(new SorClass());
256 // }else if (Estimators[i] == "jest") {
257 // calculators.push_back(new Jest());
258 // }else if (Estimators[i] == "sorest") {
259 // calculators.push_back(new SorEst());
260 // }else if (Estimators[i] == "thetayc") {
261 // calculators.push_back(new ThetaYC());
262 // }else if (Estimators[i] == "thetan") {
263 // calculators.push_back(new ThetaN());
264 // }else if (Estimators[i] == "kstest") {
265 // calculators.push_back(new KSTest());
266 // }else if (Estimators[i] == "sharednseqs") {
267 // calculators.push_back(new SharedNSeqs());
268 // }else if (Estimators[i] == "ochiai") {
269 // calculators.push_back(new Ochiai());
270 // }else if (Estimators[i] == "anderberg") {
271 // calculators.push_back(new Anderberg());
272 // }else if (Estimators[i] == "kulczynski") {
273 // calculators.push_back(new Kulczynski());
274 // }else if (Estimators[i] == "kulczynskicody") {
275 // calculators.push_back(new KulczynskiCody());
276 // }else if (Estimators[i] == "lennon") {
277 // calculators.push_back(new Lennon());
278 // }else if (Estimators[i] == "morisitahorn") {
279 // calculators.push_back(new MorHorn());
280 // }else if (Estimators[i] == "braycurtis") {
281 // calculators.push_back(new BrayCurtis());
282 // }else if (Estimators[i] == "whittaker") {
283 // calculators.push_back(new Whittaker());
284 // }else if (Estimators[i] == "odum") {
285 // calculators.push_back(new Odum());
286 // }else if (Estimators[i] == "canberra") {
287 // calculators.push_back(new Canberra());
288 // }else if (Estimators[i] == "structeuclidean") {
289 // calculators.push_back(new StructEuclidean());
290 // }else if (Estimators[i] == "structchord") {
291 // calculators.push_back(new StructChord());
292 // }else if (Estimators[i] == "hellinger") {
293 // calculators.push_back(new Hellinger());
294 // }else if (Estimators[i] == "manhattan") {
295 // calculators.push_back(new Manhattan());
296 // }else if (Estimators[i] == "structpearson") {
297 // calculators.push_back(new StructPearson());
298 // }else if (Estimators[i] == "soergel") {
299 // calculators.push_back(new Soergel());
300 // }else if (Estimators[i] == "spearman") {
301 // calculators.push_back(new Spearman());
302 // }else if (Estimators[i] == "structkulczynski") {
303 // calculators.push_back(new StructKulczynski());
304 // }else if (Estimators[i] == "speciesprofile") {
305 // calculators.push_back(new SpeciesProfile());
306 // }else if (Estimators[i] == "hamming") {
307 // calculators.push_back(new Hamming());
308 // }else if (Estimators[i] == "structchi2") {
309 // calculators.push_back(new StructChi2());
310 // }else if (Estimators[i] == "gower") {
311 // calculators.push_back(new Gower());
312 // }else if (Estimators[i] == "memchi2") {
313 // calculators.push_back(new MemChi2());
314 // }else if (Estimators[i] == "memchord") {
315 // calculators.push_back(new MemChord());
316 // }else if (Estimators[i] == "memeuclidean") {
317 // calculators.push_back(new MemEuclidean());
318 // }else if (Estimators[i] == "mempearson") {
319 // calculators.push_back(new MemPearson());
327 catch(exception& e) {
328 m->errorOut(e, "AmovaCommand", "AmovaCommand");
333 //**********************************************************************************************************************
335 void AmovaCommand::help(){
337 m->mothurOut("Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.\n");
338 // m->mothurOut("The amova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
339 m->mothurOut("The amova command outputs a .amova file. \n");
340 m->mothurOut("The amova command parameters are phylip, iters, and alpha. The phylip and design parameters are required.\n");
341 m->mothurOut("The design parameter allows you to assign your samples to groups when you are running amova. It is required. \n");
342 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n");
343 // m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile. To run the pairwise comparisons of all sets use sets=all.\n");
344 m->mothurOut("The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n");
345 // m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
346 // m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
347 // m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
348 m->mothurOut("The amova command should be in the following format: amova(phylip=file.dist, design=file.design).\n");
349 m->mothurOut("Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n\n");
352 catch(exception& e) {
353 m->errorOut(e, "AmovaCommand", "help");
358 //**********************************************************************************************************************
360 AmovaCommand::~AmovaCommand(){}
362 //**********************************************************************************************************************
364 int AmovaCommand::execute(){
367 if (abort == true) { if (calledHelp) { return 0; } return 2; }
370 designMap = new GroupMap(designFileName);
371 designMap->readDesignMap();
373 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
375 //read in distance matrix and square it
376 ReadPhylipVector readMatrix(phylipFileName);
377 vector<string> sampleNames = readMatrix.read(distanceMatrix);
378 int numSamples = sampleNames.size();
380 for(int i=0;i<distanceMatrix.size();i++){
381 for(int j=0;j<i;j++){
382 distanceMatrix[i][j] *= distanceMatrix[i][j];
386 //link designMap to rows/columns in distance matrix
387 map<string, vector<int> > origGroupSampleMap;
388 for(int i=0;i<numSamples;i++){
389 origGroupSampleMap[designMap->getGroup(sampleNames[i])].push_back(i);
391 int numGroups = origGroupSampleMap.size();
393 //create a new filename
395 string AMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + "amova";
396 m->openOutputFile(AMOVAFileName, AMOVAFile);
397 outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
399 experimentwiseAlpha = 0.05;
401 double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha);
402 if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){
404 int numCombos = numGroups * (numGroups-1) / 2;
405 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
407 map<string, vector<int> >::iterator itA;
408 map<string, vector<int> >::iterator itB;
410 for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
412 for(itB;itB!=origGroupSampleMap.end();itB++){
414 map<string, vector<int> > pairwiseGroupSampleMap;
415 pairwiseGroupSampleMap[itA->first] = itA->second;
416 pairwiseGroupSampleMap[itB->first] = itB->second;
418 runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
421 cout << "Experiment-wise error rate: " << experimentwiseAlpha << endl;
422 cout << "Pair-wise error rate (Bonferroni): " << pairwiseAlpha << endl;
425 cout << "Experiment-wise error rate: " << experimentwiseAlpha << endl;
428 cout << "If you have borderline P-values, you should try increasing the number of iterations" << endl;
431 // //make sure sets are all in designMap
432 // SharedUtil* util = new SharedUtil();
433 // util->setGroups(Sets, designMap->namesOfGroups);
436 // //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
437 // if (pickedGroups) {
438 // for (int a=0; a<numGroups; a++) {
439 // for (int l = 0; l < a; l++) {
440 // vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
441 // namesOfGroupCombos.push_back(groups);
444 // }else { //one giant compare
445 // vector<string> groups;
446 // for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
447 // namesOfGroupCombos.push_back(groups);
451 // if (numGroups == 2) { processors = 1; }
452 // else if (numGroups < 2) {
453 // m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command.");
454 // m->mothurOutEndLine(); m->control_pressed = true;
457 // #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
458 // if(processors != 1){
459 // int numPairs = namesOfGroupCombos.size();
460 // int numPairsPerProcessor = numPairs / processors;
462 // for (int i = 0; i < processors; i++) {
463 // int startPos = i * numPairsPerProcessor;
464 // if(i == processors - 1){
465 // numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
467 // lines.push_back(linePair(startPos, numPairsPerProcessor));
472 // if (sharedfile != "") { //create distance matrix for each label
474 // if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
476 // //for each calculator
477 // for(int i = 0 ; i < calculators.size(); i++) {
479 // //create a new filename
481 // string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";
482 // m->openOutputFile(outputFile, out);
483 // outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
486 // out << "label\tgroupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;
488 // m->mothurOut("label\tgroupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue\n");
491 // InputData input(sharedfile, "sharedfile");
492 // vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
493 // string lastLabel = lookup[0]->getLabel();
495 // //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
496 // set<string> processedLabels;
497 // set<string> userLabels = labels;
499 // //sanity check between shared file groups and design file groups
500 // for (int i = 0; i < lookup.size(); i++) {
501 // string group = designMap->getGroup(lookup[i]->getGroup());
503 // if (group == "not found") { m->control_pressed = true; m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine(); }
506 // //as long as you are not at the end of the file or done wih the lines you want
507 // while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
509 // if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
511 // if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
513 // m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
516 // processedLabels.insert(lookup[0]->getLabel());
517 // userLabels.erase(lookup[0]->getLabel());
520 // if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
521 // string saveLabel = lookup[0]->getLabel();
523 // for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
524 // lookup = input.getSharedRAbundVectors(lastLabel);
525 // m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
529 // processedLabels.insert(lookup[0]->getLabel());
530 // userLabels.erase(lookup[0]->getLabel());
532 // //restore real lastlabel to save below
533 // lookup[0]->setLabel(saveLabel);
536 // lastLabel = lookup[0]->getLabel();
537 // //prevent memory leak
538 // for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
540 // if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
542 // //get next line to process
543 // lookup = input.getSharedRAbundVectors();
546 // if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
548 // //output error messages about any remaining user labels
549 // set<string>::iterator it;
550 // bool needToRun = false;
551 // for (it = userLabels.begin(); it != userLabels.end(); it++) {
552 // m->mothurOut("Your file does not include the label " + *it);
553 // if (processedLabels.count(lastLabel) != 1) {
554 // m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
557 // m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
561 // //run last label if you need to
562 // if (needToRun == true) {
563 // for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
564 // lookup = input.getSharedRAbundVectors(lastLabel);
566 // m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
570 // for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
573 // //reset groups parameter
574 // globaldata->Groups.clear();
577 // }else { //user provided distance matrix
579 // if (outputDir == "") { outputDir = m->hasPath(phylipFile); }
581 // //create a new filename
583 // string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipFile)) + "amova";
584 // m->openOutputFile(outputFile, out);
585 // outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
588 // out << "groupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;
590 // m->mothurOut("groupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue\n");
592 // ReadPhylipVector readMatrix(phylipFile);
593 // vector< vector<double> > completeMatrix;
594 // vector<string> names = readMatrix.read(completeMatrix);
596 // #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
597 // if(processors == 1){
598 // driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
601 // vector<int> processIDS;
603 // //loop through and create all the processes you want
604 // while (process != processors) {
608 // processIDS.push_back(pid);
610 // }else if (pid == 0){
611 // driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
614 // m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
615 // for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
621 // driver(lines[0].start, lines[0].num, names, "", completeMatrix);
623 // //force parent to wait until all the processes are done
624 // for (int i=0;i<(processors-1);i++) {
625 // int temp = processIDS[i];
630 // string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipFile)) + "amova";
631 // for (int j = 0; j < processIDS.size(); j++) {
632 // m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
633 // remove((outputFile + toString(processIDS[j])).c_str());
638 // driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
645 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
647 m->mothurOutEndLine();
648 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
649 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
650 m->mothurOutEndLine();
654 catch(exception& e) {
655 m->errorOut(e, "AmovaCommand", "execute");
660 //**********************************************************************************************************************
662 double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > groupSampleMap, double alpha) {
664 map<string, vector<int> >::iterator it;
666 int numGroups = groupSampleMap.size();
669 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
670 numSamples += it->second.size();
673 double ssTotalOrig = calcSSTotal(groupSampleMap);
674 double ssWithinOrig = calcSSWithin(groupSampleMap);
675 double ssAmongOrig = ssTotalOrig - ssWithinOrig;
678 for(int i=0;i<iters;i++){
679 map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
680 double ssWithinRand = calcSSWithin(randomizedGroup);
681 if(ssWithinRand < ssWithinOrig){ counter++; }
684 double pValue = (double)counter / (double) iters;
686 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
687 else { pString = toString(pValue); }
691 it = groupSampleMap.begin();
694 for(it;it!=groupSampleMap.end();it++){
695 cout << " vs. " << it->first;
697 cout << "ANOVA\tAmong\tWithin\tTotal" << endl;
698 cout << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl;
700 int dfAmong = numGroups - 1; double MSAmong = ssAmongOrig / (double) dfAmong;
701 int dfWithin = numSamples - numGroups; double MSWithin = ssWithinOrig / (double) dfWithin;
702 int dfTotal = numSamples - 1; double Fs = MSAmong / MSWithin;
704 cout << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl;
705 cout << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl;
706 cout << "Fs:\t" << Fs << endl;
707 cout << "p-value: " << pString;
708 if(pValue < alpha){ cout << " ***"; }
709 cout << endl << endl;
713 catch(exception& e) {
714 m->errorOut(e, "AmovaCommand", "calcAmova");
719 //**********************************************************************************************************************
721 map<string, vector<int> > AmovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
723 vector<int> sampleIndices;
724 vector<int> samplesPerGroup;
726 map<string, vector<int> >::iterator it;
727 for(it=origMapping.begin();it!=origMapping.end();it++){
728 vector<int> indices = it->second;
729 samplesPerGroup.push_back(indices.size());
730 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
733 random_shuffle(sampleIndices.begin(), sampleIndices.end());
736 map<string, vector<int> > randomizedGroups = origMapping;
737 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
738 for(int i=0;i<it->second.size();i++){
739 it->second[i] = sampleIndices[index++];
743 return randomizedGroups;
745 catch (exception& e) {
746 m->errorOut(e, "AmovaCommand", "randomizeGroups");
751 //**********************************************************************************************************************
753 double AmovaCommand::calcSSTotal(map<string, vector<int> >& groupSampleMap) {
757 map<string, vector<int> >::iterator it;
758 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
759 indices.insert(indices.end(), it->second.begin(), it->second.end());
761 sort(indices.begin(), indices.end());
763 int numIndices =indices.size();
764 double ssTotal = 0.0;
766 for(int i=1;i<numIndices;i++){
767 int row = indices[i];
769 for(int j=0;j<i;j++){
770 ssTotal += distanceMatrix[row][indices[j]];
773 ssTotal /= numIndices;
777 catch(exception& e) {
778 m->errorOut(e, "AmovaCommand", "calcSSTotal");
783 //**********************************************************************************************************************
785 double AmovaCommand::calcSSWithin(map<string, vector<int> >& groupSampleMap) {
788 double ssWithin = 0.0;
790 map<string, vector<int> >::iterator it;
791 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
793 double withinGroup = 0;
795 vector<int> samples = it->second;
797 for(int i=0;i<samples.size();i++){
798 int row = samples[i];
800 for(int j=0;j<samples.size();j++){
801 int col = samples[j];
804 withinGroup += distanceMatrix[row][col];
810 ssWithin += withinGroup / samples.size();
815 catch(exception& e) {
816 m->errorOut(e, "AmovaCommand", "calcWithin");
821 //**********************************************************************************************************************
823 //int AmovaCommand::process(vector<SharedRAbundVector*> thisLookup) {
826 //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
827 // if(processors == 1){
828 // driver(0, namesOfGroupCombos.size(), thisLookup, "");
831 // vector<int> processIDS;
833 // //loop through and create all the processes you want
834 // while (process != processors) {
838 // processIDS.push_back(pid);
840 // }else if (pid == 0){
841 // driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
844 // m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
845 // for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
851 // driver(lines[0].start, lines[0].num, thisLookup, "");
853 // //force parent to wait until all the processes are done
854 // for (int i=0;i<(processors-1);i++) {
855 // int temp = processIDS[i];
860 // for(int i = 0 ; i < calculators.size(); i++) {
861 // string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";
863 // for (int j = 0; j < processIDS.size(); j++) {
864 // m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
865 // remove((outputFile + toString(processIDS[j])).c_str());
870 // driver(0, namesOfGroupCombos.size(), thisLookUp, "");
875 // catch(exception& e) {
876 // m->errorOut(e, "AmovaCommand", "process");
881 //**********************************************************************************************************************
883 //int AmovaCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
885 // vector<SharedRAbundVector*> subset;
888 // //for each calculator
889 // for(int i = 0 ; i < calculators.size(); i++) {
891 // //create a new filename
893 // string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova" + pidValue;
894 // m->openOutputFileAppend(outputFile, out);
895 // out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
898 // for (int c = start; c < (start+num); c++) {
900 // if (m->control_pressed) { out.close(); return 0; }
903 // vector<string> setNames;
904 // for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
906 // vector<SharedRAbundVector*> thisCombosLookup;
907 // vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
908 // for (int k = 0; k < thisLookup.size(); k++) {
909 // string thisGroup = thisLookup[k]->getGroup();
911 // //is this group for a set we want to compare??
912 // if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {
913 // thisCombosLookup.push_back(thisLookup[k]);
914 // thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
919 // int numGroups = thisCombosLookup.size();
921 // //calc the distance matrix
923 // matrix.resize(numGroups);
924 // for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
926 // if (thisCombosLookup.size() == 0) {
927 // m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine();
930 // m->mothurOut(thisLookup[0]->getLabel() + '\t');
931 // out << thisLookup[0]->getLabel() << '\t';
932 // if (setNames.size() == 2) {
933 // m->mothurOut(setNames[0] + '-' + setNames[1] + '\t');
934 // out << setNames[0] << "-" << setNames[1] << '\t';
937 // out << "all" << '\t';
938 // m->mothurOut("all\t");
941 // for (int k = 0; k < thisCombosLookup.size(); k++) {
942 // for (int l = k; l < thisCombosLookup.size(); l++) {
944 // if (m->control_pressed) { out.close(); return 0; }
946 // if (k != l) { //we dont need to similiarity of a groups to itself
947 // //get estimated similarity between 2 groups
948 // subset.clear(); //clear out old pair of sharedrabunds
949 // //add new pair of sharedrabunds
950 // subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]);
952 // //if this calc needs all groups to calculate the pair load all groups
953 // if (calculators[i]->getNeedsAll()) {
954 // //load subset with rest of lookup for those calcs that need everyone to calc for a pair
955 // for (int w = 0; w < thisCombosLookup.size(); w++) {
956 // if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
960 // data = calculators[i]->getValues(subset); //saves the calculator outputs
962 // //save values in similarity matrix
963 // matrix[k][l] = 1.0 - data[0];
964 // matrix[l][k] = 1.0 - data[0];
970 // calcAmova(out, setNames.size(), thisCombosLookupSets);
980 // catch(exception& e) {
981 // m->errorOut(e, "AmovaCommand", "driver");
986 //**********************************************************************************************************************
988 //int AmovaCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
991 // //create a new filename
993 // string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipFile)) + "amova" + pidValue;
994 // m->openOutputFileAppend(outputFile, out);
995 // out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
998 // for (int c = start; c < (start+num); c++) {
1000 // if (m->control_pressed) { out.close(); return 0; }
1003 // vector<string> setNames;
1004 // for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
1006 // vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
1007 // set<int> indexes;
1008 // for (int k = 0; k < names.size(); k++) {
1009 // //is this group for a set we want to compare??
1010 // if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {
1011 // thisCombosSets.push_back(designMap->getGroup(names[k]));
1012 // indexes.insert(k); //save indexes of valid rows in matrix for submatrix
1016 // int numGroups = thisCombosSets.size();
1018 // //calc the distance matrix
1020 // matrix.resize(numGroups);
1022 // for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
1024 // if (thisCombosSets.size() == 0) {
1025 // m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine();
1028 // if (setNames.size() == 2) {
1029 // out << setNames[0] << "-" << setNames[1] << '\t';
1030 // m->mothurOut(setNames[0] + '-' + setNames[1] + '\t');
1033 // out << "all" << '\t';
1034 // m->mothurOut("all\t");
1038 // int rowCount = 0;
1039 // for (int j = 0; j < completeMatrix.size(); j++) {
1041 // if (indexes.count(j) != 0) { //we want at least part of this row
1042 // int columnCount = 0;
1043 // for (int k = 0; k < completeMatrix[j].size(); k++) {
1045 // if (indexes.count(k) != 0) { //we want this distance
1046 // matrix[rowCount][columnCount] = completeMatrix[j][k];
1047 // matrix[columnCount][rowCount] = completeMatrix[j][k];
1056 // calcAmova(out, setNames.size(), thisCombosSets);
1066 // catch(exception& e) {
1067 // m->errorOut(e, "AmovaCommand", "driver");
1072 //**********************************************************************************************************************