5 * Created by westcott on 2/7/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "amovacommand.h"
11 #include "sharedutilities.h"
12 #include "sharedsobscollectsummary.h"
13 #include "sharedchao1.h"
14 #include "sharedace.h"
15 #include "sharednseqs.h"
16 #include "sharedjabund.h"
17 #include "sharedsorabund.h"
18 #include "sharedjclass.h"
19 #include "sharedsorclass.h"
20 #include "sharedjest.h"
21 #include "sharedsorest.h"
22 #include "sharedthetayc.h"
23 #include "sharedthetan.h"
24 #include "sharedkstest.h"
25 #include "whittaker.h"
26 #include "sharedochiai.h"
27 #include "sharedanderbergs.h"
28 #include "sharedkulczynski.h"
29 #include "sharedkulczynskicody.h"
30 #include "sharedlennon.h"
31 #include "sharedmorisitahorn.h"
32 #include "sharedbraycurtis.h"
33 #include "sharedjackknife.h"
34 #include "whittaker.h"
37 #include "structeuclidean.h"
38 #include "structchord.h"
39 #include "hellinger.h"
40 #include "manhattan.h"
41 #include "structpearson.h"
44 #include "structkulczynski.h"
45 #include "structchi2.h"
46 #include "speciesprofile.h"
51 #include "memeuclidean.h"
52 #include "mempearson.h"
54 //**********************************************************************************************************************
55 vector<string> AmovaCommand::getValidParameters(){
57 string Array[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
58 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
62 m->errorOut(e, "AmovaCommand", "getValidParameters");
66 //**********************************************************************************************************************
67 AmovaCommand::AmovaCommand(){
69 abort = true; calledHelp = true;
70 vector<string> tempOutNames;
71 outputTypes["amova"] = tempOutNames;
74 m->errorOut(e, "AmovaCommand", "AmovaCommand");
78 //**********************************************************************************************************************
79 vector<string> AmovaCommand::getRequiredParameters(){
81 string Array[] = {"design"};
82 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
86 m->errorOut(e, "AmovaCommand", "getRequiredParameters");
90 //**********************************************************************************************************************
91 vector<string> AmovaCommand::getRequiredFiles(){
94 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
98 m->errorOut(e, "AmovaCommand", "getRequiredFiles");
102 //**********************************************************************************************************************
104 AmovaCommand::AmovaCommand(string option) {
106 globaldata = GlobalData::getInstance();
107 abort = false; calledHelp = false;
111 //allow user to run help
112 if(option == "help") { help(); abort = true; calledHelp = true; }
115 //valid paramters for this command
116 string AlignArray[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
117 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
119 OptionParser parser(option);
120 map<string,string> parameters = parser.getParameters();
122 ValidParameters validParameter;
124 //check to make sure all parameters are valid for command
125 map<string,string>::iterator it;
126 for (it = parameters.begin(); it != parameters.end(); it++) {
127 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
130 //initialize outputTypes
131 vector<string> tempOutNames;
132 outputTypes["amova"] = tempOutNames;
134 //if the user changes the output directory command factory will send this info to us in the output parameter
135 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
137 //if the user changes the input directory command factory will send this info to us in the output parameter
138 string inputDir = validParameter.validFile(parameters, "inputdir", false);
139 if (inputDir == "not found"){ inputDir = ""; }
142 it = parameters.find("design");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["design"] = inputDir + it->second; }
150 it = parameters.find("phylip");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["phylip"] = inputDir + it->second; }
159 phylipfile = validParameter.validFile(parameters, "phylip", true);
160 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
161 else if (phylipfile == "not found") { phylipfile = ""; }
162 else { globaldata->newRead(); format = "phylip"; globaldata->setPhylipFile(phylipfile); }
164 //check for required parameters
165 designfile = validParameter.validFile(parameters, "design", true);
166 if (designfile == "not open") { abort = true; }
167 else if (designfile == "not found") { designfile = ""; m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }
169 //make sure the user has already run the read.otu command
170 if ((globaldata->getSharedFile() == "")) {
171 if ((phylipfile == "")) {
172 m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the amova command."); m->mothurOutEndLine(); abort = true;
174 }else { sharedfile = globaldata->getSharedFile(); }
176 //use distance matrix if one is provided
177 if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
179 //check for optional parameter and set defaults
180 // ...at some point should added some additional type checking...
181 label = validParameter.validFile(parameters, "label", false);
182 if (label == "not found") { label = ""; }
184 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
185 else { allLines = 1; }
188 //if the user has not specified any labels use the ones from read.otu
190 allLines = globaldata->allLines;
191 labels = globaldata->labels;
194 groups = validParameter.validFile(parameters, "groups", false);
195 if (groups == "not found") { groups = ""; }
197 m->splitAtDash(groups, Groups);
198 globaldata->Groups = Groups;
201 sets = validParameter.validFile(parameters, "sets", false);
202 if (sets == "not found") { sets = ""; pickedGroups = false; }
205 m->splitAtDash(sets, Sets);
209 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
210 convert(temp, iters);
212 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
213 convert(temp, processors);
215 vector<string> Estimators;
216 calc = validParameter.validFile(parameters, "calc", false);
217 if (calc == "not found") { calc = "morisitahorn"; }
218 m->splitAtDash(calc, Estimators);
220 if (abort == false) {
222 ValidCalculators* validCalculator = new ValidCalculators();
224 for (int i=0; i<Estimators.size(); i++) {
225 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
226 if (Estimators[i] == "sharedsobs") {
227 calculators.push_back(new SharedSobsCS());
228 }else if (Estimators[i] == "sharedchao") {
229 calculators.push_back(new SharedChao1());
230 }else if (Estimators[i] == "sharedace") {
231 calculators.push_back(new SharedAce());
232 }else if (Estimators[i] == "jabund") {
233 calculators.push_back(new JAbund());
234 }else if (Estimators[i] == "sorabund") {
235 calculators.push_back(new SorAbund());
236 }else if (Estimators[i] == "jclass") {
237 calculators.push_back(new Jclass());
238 }else if (Estimators[i] == "sorclass") {
239 calculators.push_back(new SorClass());
240 }else if (Estimators[i] == "jest") {
241 calculators.push_back(new Jest());
242 }else if (Estimators[i] == "sorest") {
243 calculators.push_back(new SorEst());
244 }else if (Estimators[i] == "thetayc") {
245 calculators.push_back(new ThetaYC());
246 }else if (Estimators[i] == "thetan") {
247 calculators.push_back(new ThetaN());
248 }else if (Estimators[i] == "kstest") {
249 calculators.push_back(new KSTest());
250 }else if (Estimators[i] == "sharednseqs") {
251 calculators.push_back(new SharedNSeqs());
252 }else if (Estimators[i] == "ochiai") {
253 calculators.push_back(new Ochiai());
254 }else if (Estimators[i] == "anderberg") {
255 calculators.push_back(new Anderberg());
256 }else if (Estimators[i] == "kulczynski") {
257 calculators.push_back(new Kulczynski());
258 }else if (Estimators[i] == "kulczynskicody") {
259 calculators.push_back(new KulczynskiCody());
260 }else if (Estimators[i] == "lennon") {
261 calculators.push_back(new Lennon());
262 }else if (Estimators[i] == "morisitahorn") {
263 calculators.push_back(new MorHorn());
264 }else if (Estimators[i] == "braycurtis") {
265 calculators.push_back(new BrayCurtis());
266 }else if (Estimators[i] == "whittaker") {
267 calculators.push_back(new Whittaker());
268 }else if (Estimators[i] == "odum") {
269 calculators.push_back(new Odum());
270 }else if (Estimators[i] == "canberra") {
271 calculators.push_back(new Canberra());
272 }else if (Estimators[i] == "structeuclidean") {
273 calculators.push_back(new StructEuclidean());
274 }else if (Estimators[i] == "structchord") {
275 calculators.push_back(new StructChord());
276 }else if (Estimators[i] == "hellinger") {
277 calculators.push_back(new Hellinger());
278 }else if (Estimators[i] == "manhattan") {
279 calculators.push_back(new Manhattan());
280 }else if (Estimators[i] == "structpearson") {
281 calculators.push_back(new StructPearson());
282 }else if (Estimators[i] == "soergel") {
283 calculators.push_back(new Soergel());
284 }else if (Estimators[i] == "spearman") {
285 calculators.push_back(new Spearman());
286 }else if (Estimators[i] == "structkulczynski") {
287 calculators.push_back(new StructKulczynski());
288 }else if (Estimators[i] == "speciesprofile") {
289 calculators.push_back(new SpeciesProfile());
290 }else if (Estimators[i] == "hamming") {
291 calculators.push_back(new Hamming());
292 }else if (Estimators[i] == "structchi2") {
293 calculators.push_back(new StructChi2());
294 }else if (Estimators[i] == "gower") {
295 calculators.push_back(new Gower());
296 }else if (Estimators[i] == "memchi2") {
297 calculators.push_back(new MemChi2());
298 }else if (Estimators[i] == "memchord") {
299 calculators.push_back(new MemChord());
300 }else if (Estimators[i] == "memeuclidean") {
301 calculators.push_back(new MemEuclidean());
302 }else if (Estimators[i] == "mempearson") {
303 calculators.push_back(new MemPearson());
311 catch(exception& e) {
312 m->errorOut(e, "AmovaCommand", "AmovaCommand");
317 //**********************************************************************************************************************
319 void AmovaCommand::help(){
321 m->mothurOut("Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.\n");
322 m->mothurOut("The amova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
323 m->mothurOut("The amova command outputs a .amova file. \n");
324 m->mothurOut("The amova command parameters are phylip, iters, groups, label, design, sets and processors. The design parameter is required.\n");
325 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running amova. It is required. \n");
326 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
327 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile. To run the pairwise comparisons of all sets use sets=all.\n");
328 m->mothurOut("The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n");
329 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
330 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
331 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
332 m->mothurOut("The amova command should be in the following format: amova(design=yourDesignFile).\n");
333 m->mothurOut("Example amova(design=temp.design, groups=A-B-C).\n");
334 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
337 catch(exception& e) {
338 m->errorOut(e, "AmovaCommand", "help");
343 //**********************************************************************************************************************
345 AmovaCommand::~AmovaCommand(){}
347 //**********************************************************************************************************************
349 int AmovaCommand::execute(){
352 if (abort == true) { if (calledHelp) { return 0; } return 2; }
355 designMap = new GroupMap(designfile);
356 designMap->readDesignMap();
358 //make sure sets are all in designMap
359 SharedUtil* util = new SharedUtil();
360 util->setGroups(Sets, designMap->namesOfGroups);
363 //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
364 int numGroups = Sets.size();
366 for (int a=0; a<numGroups; a++) {
367 for (int l = 0; l < a; l++) {
368 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
369 namesOfGroupCombos.push_back(groups);
372 }else { //one giant compare
373 vector<string> groups;
374 for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
375 namesOfGroupCombos.push_back(groups);
379 if (numGroups == 2) { processors = 1; }
380 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
382 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
384 int numPairs = namesOfGroupCombos.size();
385 int numPairsPerProcessor = numPairs / processors;
387 for (int i = 0; i < processors; i++) {
388 int startPos = i * numPairsPerProcessor;
389 if(i == processors - 1){
390 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
392 lines.push_back(linePair(startPos, numPairsPerProcessor));
397 if (sharedfile != "") { //create distance matrix for each label
399 if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
401 //for each calculator
402 for(int i = 0 ; i < calculators.size(); i++) {
404 //create a new filename
406 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";
407 m->openOutputFile(outputFile, out);
408 outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
411 out << "label\tgroupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;
415 InputData input(sharedfile, "sharedfile");
416 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
417 string lastLabel = lookup[0]->getLabel();
419 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
420 set<string> processedLabels;
421 set<string> userLabels = labels;
423 //sanity check between shared file groups and design file groups
424 for (int i = 0; i < lookup.size(); i++) {
425 string group = designMap->getGroup(lookup[i]->getGroup());
427 if (group == "not found") { m->control_pressed = true; m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine(); }
430 //as long as you are not at the end of the file or done wih the lines you want
431 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
433 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
435 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
437 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
440 processedLabels.insert(lookup[0]->getLabel());
441 userLabels.erase(lookup[0]->getLabel());
444 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
445 string saveLabel = lookup[0]->getLabel();
447 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
448 lookup = input.getSharedRAbundVectors(lastLabel);
449 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
453 processedLabels.insert(lookup[0]->getLabel());
454 userLabels.erase(lookup[0]->getLabel());
456 //restore real lastlabel to save below
457 lookup[0]->setLabel(saveLabel);
460 lastLabel = lookup[0]->getLabel();
461 //prevent memory leak
462 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
464 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
466 //get next line to process
467 lookup = input.getSharedRAbundVectors();
470 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
472 //output error messages about any remaining user labels
473 set<string>::iterator it;
474 bool needToRun = false;
475 for (it = userLabels.begin(); it != userLabels.end(); it++) {
476 m->mothurOut("Your file does not include the label " + *it);
477 if (processedLabels.count(lastLabel) != 1) {
478 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
481 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
485 //run last label if you need to
486 if (needToRun == true) {
487 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
488 lookup = input.getSharedRAbundVectors(lastLabel);
490 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
494 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
497 //reset groups parameter
498 globaldata->Groups.clear();
501 }else { //user provided distance matrix
503 if (outputDir == "") { outputDir = m->hasPath(phylipfile); }
505 //create a new filename
507 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "amova";
508 m->openOutputFile(outputFile, out);
509 outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
512 out << "groupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;
515 ReadPhylipVector readMatrix(phylipfile);
516 vector< vector<double> > completeMatrix;
517 vector<string> names = readMatrix.read(completeMatrix);
519 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
521 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
524 vector<int> processIDS;
526 //loop through and create all the processes you want
527 while (process != processors) {
531 processIDS.push_back(pid);
534 driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
537 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
538 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
544 driver(lines[0].start, lines[0].num, names, "", completeMatrix);
546 //force parent to wait until all the processes are done
547 for (int i=0;i<(processors-1);i++) {
548 int temp = processIDS[i];
553 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "amova";
554 for (int j = 0; j < processIDS.size(); j++) {
555 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
556 remove((outputFile + toString(processIDS[j])).c_str());
561 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
568 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
570 m->mothurOutEndLine();
571 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
572 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
573 m->mothurOutEndLine();
577 catch(exception& e) {
578 m->errorOut(e, "AmovaCommand", "execute");
582 //**********************************************************************************************************************
584 int AmovaCommand::process(vector<SharedRAbundVector*> thisLookup) {
587 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
589 driver(0, namesOfGroupCombos.size(), thisLookup, "");
592 vector<int> processIDS;
594 //loop through and create all the processes you want
595 while (process != processors) {
599 processIDS.push_back(pid);
602 driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
605 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
606 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
612 driver(lines[0].start, lines[0].num, thisLookup, "");
614 //force parent to wait until all the processes are done
615 for (int i=0;i<(processors-1);i++) {
616 int temp = processIDS[i];
621 for(int i = 0 ; i < calculators.size(); i++) {
622 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";
624 for (int j = 0; j < processIDS.size(); j++) {
625 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
626 remove((outputFile + toString(processIDS[j])).c_str());
631 driver(0, namesOfGroupCombos.size(), thisLookUp, "");
636 catch(exception& e) {
637 m->errorOut(e, "AmovaCommand", "process");
641 //**********************************************************************************************************************
643 int AmovaCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
645 vector<SharedRAbundVector*> subset;
648 //for each calculator
649 for(int i = 0 ; i < calculators.size(); i++) {
651 //create a new filename
653 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova" + pidValue;
654 m->openOutputFileAppend(outputFile, out);
655 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
658 for (int c = start; c < (start+num); c++) {
660 if (m->control_pressed) { out.close(); return 0; }
663 vector<string> setNames;
664 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
666 vector<SharedRAbundVector*> thisCombosLookup;
667 vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
668 for (int k = 0; k < thisLookup.size(); k++) {
669 string thisGroup = thisLookup[k]->getGroup();
671 //is this group for a set we want to compare??
672 if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {
673 thisCombosLookup.push_back(thisLookup[k]);
674 thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
679 int numGroups = thisCombosLookup.size();
681 //calc the distance matrix
683 matrix.resize(numGroups);
684 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
686 if (thisCombosLookup.size() == 0) {
687 m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine();
690 out << thisLookup[0]->getLabel() << '\t';
691 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
692 else { out << "all" << '\t'; }
694 for (int k = 0; k < thisCombosLookup.size(); k++) {
695 for (int l = k; l < thisCombosLookup.size(); l++) {
697 if (m->control_pressed) { out.close(); return 0; }
699 if (k != l) { //we dont need to similiarity of a groups to itself
700 //get estimated similarity between 2 groups
701 subset.clear(); //clear out old pair of sharedrabunds
702 //add new pair of sharedrabunds
703 subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]);
705 //if this calc needs all groups to calculate the pair load all groups
706 if (calculators[i]->getNeedsAll()) {
707 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
708 for (int w = 0; w < thisCombosLookup.size(); w++) {
709 if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
713 data = calculators[i]->getValues(subset); //saves the calculator outputs
715 //save values in similarity matrix
716 matrix[k][l] = 1.0 - data[0];
717 matrix[l][k] = 1.0 - data[0];
723 calcAmova(out, setNames.size(), thisCombosLookupSets);
733 catch(exception& e) {
734 m->errorOut(e, "AmovaCommand", "driver");
738 //**********************************************************************************************************************
740 int AmovaCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
743 //create a new filename
745 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "amova" + pidValue;
746 m->openOutputFileAppend(outputFile, out);
747 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
750 for (int c = start; c < (start+num); c++) {
752 if (m->control_pressed) { out.close(); return 0; }
755 vector<string> setNames;
756 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
758 vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
760 for (int k = 0; k < names.size(); k++) {
761 //is this group for a set we want to compare??
762 if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {
763 thisCombosSets.push_back(designMap->getGroup(names[k]));
764 indexes.insert(k); //save indexes of valid rows in matrix for submatrix
768 int numGroups = thisCombosSets.size();
770 //calc the distance matrix
772 matrix.resize(numGroups);
774 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
776 if (thisCombosSets.size() == 0) {
777 m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine();
780 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
781 else { out << "all" << '\t'; }
785 for (int j = 0; j < completeMatrix.size(); j++) {
787 if (indexes.count(j) != 0) { //we want at least part of this row
789 for (int k = 0; k < completeMatrix[j].size(); k++) {
791 if (indexes.count(k) != 0) { //we want this distance
792 matrix[rowCount][columnCount] = completeMatrix[j][k];
793 matrix[columnCount][rowCount] = completeMatrix[j][k];
802 calcAmova(out, setNames.size(), thisCombosSets);
812 catch(exception& e) {
813 m->errorOut(e, "AmovaCommand", "driver");
817 //**********************************************************************************************************************
818 int AmovaCommand::calcAmova(ofstream& out, int numTreatments, vector<string> thisCombosLookupSets) {
821 double SSWithin, SSTotal, SSAmoung, MSWithin, MSAmoung, FStatistic, pValue;
823 SSWithin = calcWithin(matrix, numTreatments, thisCombosLookupSets);
824 SSTotal = calcTotal(numTreatments);
827 for (int i = 0; i < iters; i++) {
828 if (m->control_pressed) { break; }
830 //randomly shuffle names to randomize the matrix
831 vector<string> copyNames = thisCombosLookupSets;
832 random_shuffle(copyNames.begin(), copyNames.end());
834 double randomSSWithin = calcWithin(matrix, numTreatments, copyNames);
836 if (randomSSWithin <= SSWithin) { count++; }
839 SSAmoung = SSTotal - SSWithin;
840 MSWithin = SSWithin / (double) (matrix.size() - numTreatments);
841 MSAmoung = SSAmoung / (double) (numTreatments - 1);
842 FStatistic = MSAmoung / MSWithin;
844 pValue = count / (float) iters;
846 out << MSWithin << '\t' << MSAmoung << '\t' << FStatistic << '\t' << pValue << endl;
847 //cout << "ssAmong = " << SSAmoung << '\t' << "sswithin = " << SSWithin << '\t' << "ssTotal = " << SSTotal << '\t' << "pvalue = " << pValue << endl;
850 catch(exception& e) {
851 m->errorOut(e, "AmovaCommand", "calcAmova");
855 //**********************************************************************************************************************
856 double AmovaCommand::calcWithin(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets) {
860 //traverse lower triangle
861 for (int k = 0; k < thisMatrix.size(); k++) {
862 for (int l = k; l < thisMatrix[k].size(); l++) {
864 //if you are from the same treatment then eij is 1 so add, else eij = 0
865 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) {
866 within += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
871 //1 / (numSamples / numTreatments)
872 within *= (1.0 / (float) (thisMatrix.size() / (float) numTreatments));
876 catch(exception& e) {
877 m->errorOut(e, "AmovaCommand", "calcWithin");
881 //**********************************************************************************************************************
882 double AmovaCommand::calcTotal(int numTreatments) {
886 //traverse lower triangle
887 for (int k = 0; k < matrix.size(); k++) {
888 for (int l = k; l < matrix[k].size(); l++) {
889 total += (matrix[k][l] * matrix[k][l]); //dij^2
894 total *= (1.0 / (float) matrix.size());
898 catch(exception& e) {
899 m->errorOut(e, "AmovaCommand", "calcTotal");
903 //**********************************************************************************************************************