5 * Created by westcott on 2/7/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "amovacommand.h"
11 #include "sharedutilities.h"
12 #include "sharedsobscollectsummary.h"
13 #include "sharedchao1.h"
14 #include "sharedace.h"
15 #include "sharednseqs.h"
16 #include "sharedjabund.h"
17 #include "sharedsorabund.h"
18 #include "sharedjclass.h"
19 #include "sharedsorclass.h"
20 #include "sharedjest.h"
21 #include "sharedsorest.h"
22 #include "sharedthetayc.h"
23 #include "sharedthetan.h"
24 #include "sharedkstest.h"
25 #include "whittaker.h"
26 #include "sharedochiai.h"
27 #include "sharedanderbergs.h"
28 #include "sharedkulczynski.h"
29 #include "sharedkulczynskicody.h"
30 #include "sharedlennon.h"
31 #include "sharedmorisitahorn.h"
32 #include "sharedbraycurtis.h"
33 #include "sharedjackknife.h"
34 #include "whittaker.h"
37 #include "structeuclidean.h"
38 #include "structchord.h"
39 #include "hellinger.h"
40 #include "manhattan.h"
41 #include "structpearson.h"
44 #include "structkulczynski.h"
45 #include "structchi2.h"
46 #include "speciesprofile.h"
51 #include "memeuclidean.h"
52 #include "mempearson.h"
54 //**********************************************************************************************************************
55 vector<string> AmovaCommand::getValidParameters(){
57 string Array[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
58 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
62 m->errorOut(e, "AmovaCommand", "getValidParameters");
66 //**********************************************************************************************************************
67 AmovaCommand::AmovaCommand(){
69 abort = true; calledHelp = true;
70 vector<string> tempOutNames;
71 outputTypes["amova"] = tempOutNames;
74 m->errorOut(e, "AmovaCommand", "AmovaCommand");
78 //**********************************************************************************************************************
79 vector<string> AmovaCommand::getRequiredParameters(){
81 string Array[] = {"design"};
82 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
86 m->errorOut(e, "AmovaCommand", "getRequiredParameters");
90 //**********************************************************************************************************************
91 vector<string> AmovaCommand::getRequiredFiles(){
94 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
98 m->errorOut(e, "AmovaCommand", "getRequiredFiles");
102 //**********************************************************************************************************************
104 AmovaCommand::AmovaCommand(string option) {
106 globaldata = GlobalData::getInstance();
107 abort = false; calledHelp = false;
111 //allow user to run help
112 if(option == "help") { help(); abort = true; calledHelp = true; }
115 //valid paramters for this command
116 string AlignArray[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
117 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
119 OptionParser parser(option);
120 map<string,string> parameters = parser.getParameters();
122 ValidParameters validParameter;
124 //check to make sure all parameters are valid for command
125 map<string,string>::iterator it;
126 for (it = parameters.begin(); it != parameters.end(); it++) {
127 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
130 //initialize outputTypes
131 vector<string> tempOutNames;
132 outputTypes["amova"] = tempOutNames;
134 //if the user changes the output directory command factory will send this info to us in the output parameter
135 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
137 //if the user changes the input directory command factory will send this info to us in the output parameter
138 string inputDir = validParameter.validFile(parameters, "inputdir", false);
139 if (inputDir == "not found"){ inputDir = ""; }
142 it = parameters.find("design");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["design"] = inputDir + it->second; }
150 it = parameters.find("phylip");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["phylip"] = inputDir + it->second; }
159 phylipfile = validParameter.validFile(parameters, "phylip", true);
160 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
161 else if (phylipfile == "not found") { phylipfile = ""; }
162 else { globaldata->newRead(); format = "phylip"; globaldata->setPhylipFile(phylipfile); }
164 //check for required parameters
165 designfile = validParameter.validFile(parameters, "design", true);
166 if (designfile == "not open") { abort = true; }
167 else if (designfile == "not found") { designfile = ""; m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }
169 //make sure the user has already run the read.otu command
170 if ((globaldata->getSharedFile() == "")) {
171 if ((phylipfile == "")) {
172 m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the amova command."); m->mothurOutEndLine(); abort = true;
174 }else { sharedfile = globaldata->getSharedFile(); }
176 //use distance matrix if one is provided
177 if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
179 //check for optional parameter and set defaults
180 // ...at some point should added some additional type checking...
181 label = validParameter.validFile(parameters, "label", false);
182 if (label == "not found") { label = ""; }
184 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
185 else { allLines = 1; }
188 //if the user has not specified any labels use the ones from read.otu
190 allLines = globaldata->allLines;
191 labels = globaldata->labels;
194 groups = validParameter.validFile(parameters, "groups", false);
195 if (groups == "not found") { groups = ""; }
197 m->splitAtDash(groups, Groups);
198 globaldata->Groups = Groups;
201 sets = validParameter.validFile(parameters, "sets", false);
202 if (sets == "not found") { sets = ""; pickedGroups = false; }
205 m->splitAtDash(sets, Sets);
209 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
210 convert(temp, iters);
212 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
213 convert(temp, processors);
215 vector<string> Estimators;
216 calc = validParameter.validFile(parameters, "calc", false);
217 if (calc == "not found") { calc = "morisitahorn"; }
218 m->splitAtDash(calc, Estimators);
220 if (abort == false) {
222 ValidCalculators* validCalculator = new ValidCalculators();
224 for (int i=0; i<Estimators.size(); i++) {
225 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
226 if (Estimators[i] == "sharedsobs") {
227 calculators.push_back(new SharedSobsCS());
228 }else if (Estimators[i] == "sharedchao") {
229 calculators.push_back(new SharedChao1());
230 }else if (Estimators[i] == "sharedace") {
231 calculators.push_back(new SharedAce());
232 }else if (Estimators[i] == "jabund") {
233 calculators.push_back(new JAbund());
234 }else if (Estimators[i] == "sorabund") {
235 calculators.push_back(new SorAbund());
236 }else if (Estimators[i] == "jclass") {
237 calculators.push_back(new Jclass());
238 }else if (Estimators[i] == "sorclass") {
239 calculators.push_back(new SorClass());
240 }else if (Estimators[i] == "jest") {
241 calculators.push_back(new Jest());
242 }else if (Estimators[i] == "sorest") {
243 calculators.push_back(new SorEst());
244 }else if (Estimators[i] == "thetayc") {
245 calculators.push_back(new ThetaYC());
246 }else if (Estimators[i] == "thetan") {
247 calculators.push_back(new ThetaN());
248 }else if (Estimators[i] == "kstest") {
249 calculators.push_back(new KSTest());
250 }else if (Estimators[i] == "sharednseqs") {
251 calculators.push_back(new SharedNSeqs());
252 }else if (Estimators[i] == "ochiai") {
253 calculators.push_back(new Ochiai());
254 }else if (Estimators[i] == "anderberg") {
255 calculators.push_back(new Anderberg());
256 }else if (Estimators[i] == "kulczynski") {
257 calculators.push_back(new Kulczynski());
258 }else if (Estimators[i] == "kulczynskicody") {
259 calculators.push_back(new KulczynskiCody());
260 }else if (Estimators[i] == "lennon") {
261 calculators.push_back(new Lennon());
262 }else if (Estimators[i] == "morisitahorn") {
263 calculators.push_back(new MorHorn());
264 }else if (Estimators[i] == "braycurtis") {
265 calculators.push_back(new BrayCurtis());
266 }else if (Estimators[i] == "whittaker") {
267 calculators.push_back(new Whittaker());
268 }else if (Estimators[i] == "odum") {
269 calculators.push_back(new Odum());
270 }else if (Estimators[i] == "canberra") {
271 calculators.push_back(new Canberra());
272 }else if (Estimators[i] == "structeuclidean") {
273 calculators.push_back(new StructEuclidean());
274 }else if (Estimators[i] == "structchord") {
275 calculators.push_back(new StructChord());
276 }else if (Estimators[i] == "hellinger") {
277 calculators.push_back(new Hellinger());
278 }else if (Estimators[i] == "manhattan") {
279 calculators.push_back(new Manhattan());
280 }else if (Estimators[i] == "structpearson") {
281 calculators.push_back(new StructPearson());
282 }else if (Estimators[i] == "soergel") {
283 calculators.push_back(new Soergel());
284 }else if (Estimators[i] == "spearman") {
285 calculators.push_back(new Spearman());
286 }else if (Estimators[i] == "structkulczynski") {
287 calculators.push_back(new StructKulczynski());
288 }else if (Estimators[i] == "speciesprofile") {
289 calculators.push_back(new SpeciesProfile());
290 }else if (Estimators[i] == "hamming") {
291 calculators.push_back(new Hamming());
292 }else if (Estimators[i] == "structchi2") {
293 calculators.push_back(new StructChi2());
294 }else if (Estimators[i] == "gower") {
295 calculators.push_back(new Gower());
296 }else if (Estimators[i] == "memchi2") {
297 calculators.push_back(new MemChi2());
298 }else if (Estimators[i] == "memchord") {
299 calculators.push_back(new MemChord());
300 }else if (Estimators[i] == "memeuclidean") {
301 calculators.push_back(new MemEuclidean());
302 }else if (Estimators[i] == "mempearson") {
303 calculators.push_back(new MemPearson());
311 catch(exception& e) {
312 m->errorOut(e, "AmovaCommand", "AmovaCommand");
317 //**********************************************************************************************************************
319 void AmovaCommand::help(){
321 m->mothurOut("The amova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
322 m->mothurOut("The amova command outputs a .amova file. \n");
323 m->mothurOut("The amova command parameters are phylip, iters, groups, label, design, sets and processors. The design parameter is required.\n");
324 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running amova. It is required. \n");
325 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
326 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile. To run the pairwise comparisons of all sets use sets=all.\n");
327 m->mothurOut("The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n");
328 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
329 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
330 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
331 m->mothurOut("The amova command should be in the following format: amova(design=yourDesignFile).\n");
332 m->mothurOut("Example amova(design=temp.design, groups=A-B-C).\n");
333 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
336 catch(exception& e) {
337 m->errorOut(e, "AmovaCommand", "help");
342 //**********************************************************************************************************************
344 AmovaCommand::~AmovaCommand(){}
346 //**********************************************************************************************************************
348 int AmovaCommand::execute(){
351 if (abort == true) { if (calledHelp) { return 0; } return 2; }
354 designMap = new GroupMap(designfile);
355 designMap->readDesignMap();
357 //make sure sets are all in designMap
358 SharedUtil* util = new SharedUtil();
359 util->setGroups(Sets, designMap->namesOfGroups);
362 //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
363 int numGroups = Sets.size();
365 for (int a=0; a<numGroups; a++) {
366 for (int l = 0; l < a; l++) {
367 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
368 namesOfGroupCombos.push_back(groups);
371 }else { //one giant compare
372 vector<string> groups;
373 for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
374 namesOfGroupCombos.push_back(groups);
378 if (numGroups == 2) { processors = 1; }
379 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
381 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
383 int numPairs = namesOfGroupCombos.size();
384 int numPairsPerProcessor = numPairs / processors;
386 for (int i = 0; i < processors; i++) {
387 int startPos = i * numPairsPerProcessor;
388 if(i == processors - 1){
389 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
391 lines.push_back(linePair(startPos, numPairsPerProcessor));
396 if (sharedfile != "") { //create distance matrix for each label
398 if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
400 //for each calculator
401 for(int i = 0 ; i < calculators.size(); i++) {
403 //create a new filename
405 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";
406 m->openOutputFile(outputFile, out);
407 outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
410 out << "label\tgroupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;
414 InputData input(sharedfile, "sharedfile");
415 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
416 string lastLabel = lookup[0]->getLabel();
418 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
419 set<string> processedLabels;
420 set<string> userLabels = labels;
422 //sanity check between shared file groups and design file groups
423 for (int i = 0; i < lookup.size(); i++) {
424 string group = designMap->getGroup(lookup[i]->getGroup());
426 if (group == "not found") { m->control_pressed = true; m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine(); }
429 //as long as you are not at the end of the file or done wih the lines you want
430 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
432 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
434 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
436 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
439 processedLabels.insert(lookup[0]->getLabel());
440 userLabels.erase(lookup[0]->getLabel());
443 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
444 string saveLabel = lookup[0]->getLabel();
446 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
447 lookup = input.getSharedRAbundVectors(lastLabel);
448 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
452 processedLabels.insert(lookup[0]->getLabel());
453 userLabels.erase(lookup[0]->getLabel());
455 //restore real lastlabel to save below
456 lookup[0]->setLabel(saveLabel);
459 lastLabel = lookup[0]->getLabel();
460 //prevent memory leak
461 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
463 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
465 //get next line to process
466 lookup = input.getSharedRAbundVectors();
469 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
471 //output error messages about any remaining user labels
472 set<string>::iterator it;
473 bool needToRun = false;
474 for (it = userLabels.begin(); it != userLabels.end(); it++) {
475 m->mothurOut("Your file does not include the label " + *it);
476 if (processedLabels.count(lastLabel) != 1) {
477 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
480 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
484 //run last label if you need to
485 if (needToRun == true) {
486 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
487 lookup = input.getSharedRAbundVectors(lastLabel);
489 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
493 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
496 //reset groups parameter
497 globaldata->Groups.clear();
500 }else { //user provided distance matrix
502 if (outputDir == "") { outputDir = m->hasPath(phylipfile); }
504 //create a new filename
506 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "amova";
507 m->openOutputFile(outputFile, out);
508 outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
511 out << "groupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;
514 ReadPhylipVector readMatrix(phylipfile);
515 vector< vector<double> > completeMatrix;
516 vector<string> names = readMatrix.read(completeMatrix);
518 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
520 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
523 vector<int> processIDS;
525 //loop through and create all the processes you want
526 while (process != processors) {
530 processIDS.push_back(pid);
533 driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
536 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
537 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
543 driver(lines[0].start, lines[0].num, names, "", completeMatrix);
545 //force parent to wait until all the processes are done
546 for (int i=0;i<(processors-1);i++) {
547 int temp = processIDS[i];
552 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "amova";
553 for (int j = 0; j < processIDS.size(); j++) {
554 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
555 remove((outputFile + toString(processIDS[j])).c_str());
560 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
567 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
569 m->mothurOutEndLine();
570 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
571 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
572 m->mothurOutEndLine();
576 catch(exception& e) {
577 m->errorOut(e, "AmovaCommand", "execute");
581 //**********************************************************************************************************************
583 int AmovaCommand::process(vector<SharedRAbundVector*> thisLookup) {
586 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
588 driver(0, namesOfGroupCombos.size(), thisLookup, "");
591 vector<int> processIDS;
593 //loop through and create all the processes you want
594 while (process != processors) {
598 processIDS.push_back(pid);
601 driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
604 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
605 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
611 driver(lines[0].start, lines[0].num, thisLookup, "");
613 //force parent to wait until all the processes are done
614 for (int i=0;i<(processors-1);i++) {
615 int temp = processIDS[i];
620 for(int i = 0 ; i < calculators.size(); i++) {
621 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";
623 for (int j = 0; j < processIDS.size(); j++) {
624 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
625 remove((outputFile + toString(processIDS[j])).c_str());
630 driver(0, namesOfGroupCombos.size(), thisLookUp, "");
635 catch(exception& e) {
636 m->errorOut(e, "AmovaCommand", "process");
640 //**********************************************************************************************************************
642 int AmovaCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
644 vector<SharedRAbundVector*> subset;
647 //for each calculator
648 for(int i = 0 ; i < calculators.size(); i++) {
650 //create a new filename
652 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova" + pidValue;
653 m->openOutputFileAppend(outputFile, out);
654 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
657 for (int c = start; c < (start+num); c++) {
659 if (m->control_pressed) { out.close(); return 0; }
662 vector<string> setNames;
663 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
665 vector<SharedRAbundVector*> thisCombosLookup;
666 vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
667 for (int k = 0; k < thisLookup.size(); k++) {
668 string thisGroup = thisLookup[k]->getGroup();
670 //is this group for a set we want to compare??
671 if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {
672 thisCombosLookup.push_back(thisLookup[k]);
673 thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
678 int numGroups = thisCombosLookup.size();
680 //calc the distance matrix
682 matrix.resize(numGroups);
683 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
685 if (thisCombosLookup.size() == 0) {
686 m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine();
689 out << thisLookup[0]->getLabel() << '\t';
690 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
691 else { out << "all" << '\t'; }
693 for (int k = 0; k < thisCombosLookup.size(); k++) {
694 for (int l = k; l < thisCombosLookup.size(); l++) {
696 if (m->control_pressed) { out.close(); return 0; }
698 if (k != l) { //we dont need to similiarity of a groups to itself
699 //get estimated similarity between 2 groups
700 subset.clear(); //clear out old pair of sharedrabunds
701 //add new pair of sharedrabunds
702 subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]);
704 //if this calc needs all groups to calculate the pair load all groups
705 if (calculators[i]->getNeedsAll()) {
706 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
707 for (int w = 0; w < thisCombosLookup.size(); w++) {
708 if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
712 data = calculators[i]->getValues(subset); //saves the calculator outputs
714 //save values in similarity matrix
715 matrix[k][l] = 1.0 - data[0];
716 matrix[l][k] = 1.0 - data[0];
722 calcAmova(out, setNames.size(), thisCombosLookupSets);
732 catch(exception& e) {
733 m->errorOut(e, "AmovaCommand", "driver");
737 //**********************************************************************************************************************
739 int AmovaCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
742 //create a new filename
744 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "amova" + pidValue;
745 m->openOutputFileAppend(outputFile, out);
746 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
749 for (int c = start; c < (start+num); c++) {
751 if (m->control_pressed) { out.close(); return 0; }
754 vector<string> setNames;
755 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
757 vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
759 for (int k = 0; k < names.size(); k++) {
760 //is this group for a set we want to compare??
761 if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {
762 thisCombosSets.push_back(designMap->getGroup(names[k]));
763 indexes.insert(k); //save indexes of valid rows in matrix for submatrix
767 int numGroups = thisCombosSets.size();
769 //calc the distance matrix
771 matrix.resize(numGroups);
773 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
775 if (thisCombosSets.size() == 0) {
776 m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine();
779 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
780 else { out << "all" << '\t'; }
784 for (int j = 0; j < completeMatrix.size(); j++) {
786 if (indexes.count(j) != 0) { //we want at least part of this row
788 for (int k = 0; k < completeMatrix[j].size(); k++) {
790 if (indexes.count(k) != 0) { //we want this distance
791 matrix[rowCount][columnCount] = completeMatrix[j][k];
792 matrix[columnCount][rowCount] = completeMatrix[j][k];
801 calcAmova(out, setNames.size(), thisCombosSets);
811 catch(exception& e) {
812 m->errorOut(e, "AmovaCommand", "driver");
816 //**********************************************************************************************************************
817 int AmovaCommand::calcAmova(ofstream& out, int numTreatments, vector<string> thisCombosLookupSets) {
820 double SSWithin, SSTotal, SSAmoung, MSWithin, MSAmoung, FStatistic, pValue;
822 SSWithin = calcWithin(matrix, numTreatments, thisCombosLookupSets);
823 SSTotal = calcTotal(numTreatments);
826 for (int i = 0; i < iters; i++) {
827 if (m->control_pressed) { break; }
829 //randomly shuffle names to randomize the matrix
830 vector<string> copyNames = thisCombosLookupSets;
831 random_shuffle(copyNames.begin(), copyNames.end());
833 double randomSSWithin = calcWithin(matrix, numTreatments, copyNames);
835 if (randomSSWithin <= SSWithin) { count++; }
838 SSAmoung = SSTotal - SSWithin;
839 MSWithin = SSWithin / (double) (matrix.size() - numTreatments);
840 MSAmoung = SSAmoung / (double) (numTreatments - 1);
841 FStatistic = MSAmoung / MSWithin;
843 pValue = count / (float) iters;
845 out << MSWithin << '\t' << MSAmoung << '\t' << FStatistic << '\t' << pValue << endl;
846 //cout << "ssAmong = " << SSAmoung << '\t' << "sswithin = " << SSWithin << '\t' << "ssTotal = " << SSTotal << '\t' << "pvalue = " << pValue << endl;
849 catch(exception& e) {
850 m->errorOut(e, "AmovaCommand", "calcAmova");
854 //**********************************************************************************************************************
855 double AmovaCommand::calcWithin(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets) {
859 //traverse lower triangle
860 for (int k = 0; k < thisMatrix.size(); k++) {
861 for (int l = k; l < thisMatrix[k].size(); l++) {
863 //if you are from the same treatment then eij is 1 so add, else eij = 0
864 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) {
865 within += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
870 //1 / (numSamples / numTreatments)
871 within *= (1.0 / (float) (thisMatrix.size() / (float) numTreatments));
875 catch(exception& e) {
876 m->errorOut(e, "AmovaCommand", "calcWithin");
880 //**********************************************************************************************************************
881 double AmovaCommand::calcTotal(int numTreatments) {
885 //traverse lower triangle
886 for (int k = 0; k < matrix.size(); k++) {
887 for (int l = k; l < matrix[k].size(); l++) {
888 total += (matrix[k][l] * matrix[k][l]); //dij^2
893 total *= (1.0 / (float) matrix.size());
897 catch(exception& e) {
898 m->errorOut(e, "AmovaCommand", "calcTotal");
902 //**********************************************************************************************************************