5 * Created by westcott on 2/7/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "amovacommand.h"
11 #include "sharedutilities.h"
12 #include "sharedsobscollectsummary.h"
13 #include "sharedchao1.h"
14 #include "sharedace.h"
15 #include "sharednseqs.h"
16 #include "sharedjabund.h"
17 #include "sharedsorabund.h"
18 #include "sharedjclass.h"
19 #include "sharedsorclass.h"
20 #include "sharedjest.h"
21 #include "sharedsorest.h"
22 #include "sharedthetayc.h"
23 #include "sharedthetan.h"
24 #include "sharedkstest.h"
25 #include "whittaker.h"
26 #include "sharedochiai.h"
27 #include "sharedanderbergs.h"
28 #include "sharedkulczynski.h"
29 #include "sharedkulczynskicody.h"
30 #include "sharedlennon.h"
31 #include "sharedmorisitahorn.h"
32 #include "sharedbraycurtis.h"
33 #include "sharedjackknife.h"
34 #include "whittaker.h"
37 #include "structeuclidean.h"
38 #include "structchord.h"
39 #include "hellinger.h"
40 #include "manhattan.h"
41 #include "structpearson.h"
44 #include "structkulczynski.h"
45 #include "structchi2.h"
46 #include "speciesprofile.h"
51 #include "memeuclidean.h"
52 #include "mempearson.h"
54 //**********************************************************************************************************************
55 vector<string> AmovaCommand::getValidParameters(){
57 string Array[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
58 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
62 m->errorOut(e, "AmovaCommand", "getValidParameters");
66 //**********************************************************************************************************************
67 AmovaCommand::AmovaCommand(){
70 //initialize outputTypes
71 vector<string> tempOutNames;
72 outputTypes["amova"] = tempOutNames;
75 m->errorOut(e, "AmovaCommand", "AmovaCommand");
79 //**********************************************************************************************************************
80 vector<string> AmovaCommand::getRequiredParameters(){
82 string Array[] = {"design"};
83 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
87 m->errorOut(e, "AmovaCommand", "getRequiredParameters");
91 //**********************************************************************************************************************
92 vector<string> AmovaCommand::getRequiredFiles(){
95 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
99 m->errorOut(e, "AmovaCommand", "getRequiredFiles");
103 //**********************************************************************************************************************
105 AmovaCommand::AmovaCommand(string option) {
107 globaldata = GlobalData::getInstance();
112 //allow user to run help
113 if(option == "help") { help(); abort = true; }
116 //valid paramters for this command
117 string AlignArray[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
118 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
120 OptionParser parser(option);
121 map<string,string> parameters = parser.getParameters();
123 ValidParameters validParameter;
125 //check to make sure all parameters are valid for command
126 map<string,string>::iterator it;
127 for (it = parameters.begin(); it != parameters.end(); it++) {
128 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
131 //initialize outputTypes
132 vector<string> tempOutNames;
133 outputTypes["amova"] = tempOutNames;
135 //if the user changes the output directory command factory will send this info to us in the output parameter
136 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
138 //if the user changes the input directory command factory will send this info to us in the output parameter
139 string inputDir = validParameter.validFile(parameters, "inputdir", false);
140 if (inputDir == "not found"){ inputDir = ""; }
143 it = parameters.find("design");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["design"] = inputDir + it->second; }
151 it = parameters.find("phylip");
152 //user has given a template file
153 if(it != parameters.end()){
154 path = m->hasPath(it->second);
155 //if the user has not given a path then, add inputdir. else leave path alone.
156 if (path == "") { parameters["phylip"] = inputDir + it->second; }
160 phylipfile = validParameter.validFile(parameters, "phylip", true);
161 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
162 else if (phylipfile == "not found") { phylipfile = ""; }
163 else { globaldata->newRead(); format = "phylip"; globaldata->setPhylipFile(phylipfile); }
165 //check for required parameters
166 designfile = validParameter.validFile(parameters, "design", true);
167 if (designfile == "not open") { abort = true; }
168 else if (designfile == "not found") { designfile = ""; m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }
170 //make sure the user has already run the read.otu command
171 if ((globaldata->getSharedFile() == "")) {
172 if ((phylipfile == "")) {
173 m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the amova command."); m->mothurOutEndLine(); abort = true;
175 }else { sharedfile = globaldata->getSharedFile(); }
177 //use distance matrix if one is provided
178 if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
180 //check for optional parameter and set defaults
181 // ...at some point should added some additional type checking...
182 label = validParameter.validFile(parameters, "label", false);
183 if (label == "not found") { label = ""; }
185 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
186 else { allLines = 1; }
189 //if the user has not specified any labels use the ones from read.otu
191 allLines = globaldata->allLines;
192 labels = globaldata->labels;
195 groups = validParameter.validFile(parameters, "groups", false);
196 if (groups == "not found") { groups = ""; }
198 m->splitAtDash(groups, Groups);
199 globaldata->Groups = Groups;
202 sets = validParameter.validFile(parameters, "sets", false);
203 if (sets == "not found") { sets = ""; pickedGroups = false; }
206 m->splitAtDash(sets, Sets);
210 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
211 convert(temp, iters);
213 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
214 convert(temp, processors);
216 vector<string> Estimators;
217 calc = validParameter.validFile(parameters, "calc", false);
218 if (calc == "not found") { calc = "morisitahorn"; }
219 m->splitAtDash(calc, Estimators);
221 if (abort == false) {
223 ValidCalculators* validCalculator = new ValidCalculators();
225 if (sharedfile != "") {
227 for (int i=0; i<Estimators.size(); i++) {
228 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
229 if (Estimators[i] == "sharedsobs") {
230 calculators.push_back(new SharedSobsCS());
231 }else if (Estimators[i] == "sharedchao") {
232 calculators.push_back(new SharedChao1());
233 }else if (Estimators[i] == "sharedace") {
234 calculators.push_back(new SharedAce());
235 }else if (Estimators[i] == "jabund") {
236 calculators.push_back(new JAbund());
237 }else if (Estimators[i] == "sorabund") {
238 calculators.push_back(new SorAbund());
239 }else if (Estimators[i] == "jclass") {
240 calculators.push_back(new Jclass());
241 }else if (Estimators[i] == "sorclass") {
242 calculators.push_back(new SorClass());
243 }else if (Estimators[i] == "jest") {
244 calculators.push_back(new Jest());
245 }else if (Estimators[i] == "sorest") {
246 calculators.push_back(new SorEst());
247 }else if (Estimators[i] == "thetayc") {
248 calculators.push_back(new ThetaYC());
249 }else if (Estimators[i] == "thetan") {
250 calculators.push_back(new ThetaN());
251 }else if (Estimators[i] == "kstest") {
252 calculators.push_back(new KSTest());
253 }else if (Estimators[i] == "sharednseqs") {
254 calculators.push_back(new SharedNSeqs());
255 }else if (Estimators[i] == "ochiai") {
256 calculators.push_back(new Ochiai());
257 }else if (Estimators[i] == "anderberg") {
258 calculators.push_back(new Anderberg());
259 }else if (Estimators[i] == "kulczynski") {
260 calculators.push_back(new Kulczynski());
261 }else if (Estimators[i] == "kulczynskicody") {
262 calculators.push_back(new KulczynskiCody());
263 }else if (Estimators[i] == "lennon") {
264 calculators.push_back(new Lennon());
265 }else if (Estimators[i] == "morisitahorn") {
266 calculators.push_back(new MorHorn());
267 }else if (Estimators[i] == "braycurtis") {
268 calculators.push_back(new BrayCurtis());
269 }else if (Estimators[i] == "whittaker") {
270 calculators.push_back(new Whittaker());
271 }else if (Estimators[i] == "odum") {
272 calculators.push_back(new Odum());
273 }else if (Estimators[i] == "canberra") {
274 calculators.push_back(new Canberra());
275 }else if (Estimators[i] == "structeuclidean") {
276 calculators.push_back(new StructEuclidean());
277 }else if (Estimators[i] == "structchord") {
278 calculators.push_back(new StructChord());
279 }else if (Estimators[i] == "hellinger") {
280 calculators.push_back(new Hellinger());
281 }else if (Estimators[i] == "manhattan") {
282 calculators.push_back(new Manhattan());
283 }else if (Estimators[i] == "structpearson") {
284 calculators.push_back(new StructPearson());
285 }else if (Estimators[i] == "soergel") {
286 calculators.push_back(new Soergel());
287 }else if (Estimators[i] == "spearman") {
288 calculators.push_back(new Spearman());
289 }else if (Estimators[i] == "structkulczynski") {
290 calculators.push_back(new StructKulczynski());
291 }else if (Estimators[i] == "speciesprofile") {
292 calculators.push_back(new SpeciesProfile());
293 }else if (Estimators[i] == "hamming") {
294 calculators.push_back(new Hamming());
295 }else if (Estimators[i] == "structchi2") {
296 calculators.push_back(new StructChi2());
297 }else if (Estimators[i] == "gower") {
298 calculators.push_back(new Gower());
299 }else if (Estimators[i] == "memchi2") {
300 calculators.push_back(new MemChi2());
301 }else if (Estimators[i] == "memchord") {
302 calculators.push_back(new MemChord());
303 }else if (Estimators[i] == "memeuclidean") {
304 calculators.push_back(new MemEuclidean());
305 }else if (Estimators[i] == "mempearson") {
306 calculators.push_back(new MemPearson());
315 catch(exception& e) {
316 m->errorOut(e, "AmovaCommand", "AmovaCommand");
321 //**********************************************************************************************************************
323 void AmovaCommand::help(){
325 m->mothurOut("The amova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
326 m->mothurOut("The amova command outputs a .amova file. \n");
327 m->mothurOut("The amova command parameters are phylip, iters, groups, label, design, sets and processors. The design parameter is required.\n");
328 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running amova. It is required. \n");
329 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
330 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n");
331 m->mothurOut("The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n");
332 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
333 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
334 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
335 m->mothurOut("The amova command should be in the following format: amova(design=yourDesignFile).\n");
336 m->mothurOut("Example amova(design=temp.design, groups=A-B-C).\n");
337 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
340 catch(exception& e) {
341 m->errorOut(e, "AmovaCommand", "help");
346 //**********************************************************************************************************************
348 AmovaCommand::~AmovaCommand(){}
350 //**********************************************************************************************************************
352 int AmovaCommand::execute(){
355 if (abort == true) { return 0; }
358 designMap = new GroupMap(designfile);
359 designMap->readDesignMap();
361 //make sure sets are all in designMap
362 SharedUtil* util = new SharedUtil();
363 util->setGroups(Sets, designMap->namesOfGroups);
366 //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
367 int numGroups = Sets.size();
369 for (int a=0; a<numGroups; a++) {
370 for (int l = 0; l < a; l++) {
371 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
372 namesOfGroupCombos.push_back(groups);
375 }else { //one giant compare
376 vector<string> groups;
377 for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
378 namesOfGroupCombos.push_back(groups);
382 if (numGroups == 2) { processors = 1; }
383 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
385 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
387 int numPairs = namesOfGroupCombos.size();
388 int numPairsPerProcessor = numPairs / processors;
390 for (int i = 0; i < processors; i++) {
391 int startPos = i * numPairsPerProcessor;
392 if(i == processors - 1){
393 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
395 lines.push_back(linePair(startPos, numPairsPerProcessor));
400 if (sharedfile != "") { //create distance matrix for each label
402 if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
404 //for each calculator
405 for(int i = 0 ; i < calculators.size(); i++) {
407 //create a new filename
409 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";
410 m->openOutputFile(outputFile, out);
411 outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
414 out << "label\tgroupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;
418 InputData input("sharedfile", sharedfile);
419 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
420 string lastLabel = lookup[0]->getLabel();
422 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
423 set<string> processedLabels;
424 set<string> userLabels = labels;
426 //sanity check between shared file groups and design file groups
427 for (int i = 0; i < lookup.size(); i++) {
428 string group = designMap->getGroup(lookup[i]->getGroup());
430 if (group == "not found") { m->control_pressed = true; m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine(); }
433 //as long as you are not at the end of the file or done wih the lines you want
434 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
436 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
438 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
440 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
443 processedLabels.insert(lookup[0]->getLabel());
444 userLabels.erase(lookup[0]->getLabel());
447 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
448 string saveLabel = lookup[0]->getLabel();
450 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
451 lookup = input.getSharedRAbundVectors(lastLabel);
452 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
456 processedLabels.insert(lookup[0]->getLabel());
457 userLabels.erase(lookup[0]->getLabel());
459 //restore real lastlabel to save below
460 lookup[0]->setLabel(saveLabel);
463 lastLabel = lookup[0]->getLabel();
464 //prevent memory leak
465 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
467 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
469 //get next line to process
470 lookup = input.getSharedRAbundVectors();
473 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
475 //output error messages about any remaining user labels
476 set<string>::iterator it;
477 bool needToRun = false;
478 for (it = userLabels.begin(); it != userLabels.end(); it++) {
479 m->mothurOut("Your file does not include the label " + *it);
480 if (processedLabels.count(lastLabel) != 1) {
481 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
484 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
488 //run last label if you need to
489 if (needToRun == true) {
490 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
491 lookup = input.getSharedRAbundVectors(lastLabel);
493 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
497 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
500 //reset groups parameter
501 globaldata->Groups.clear();
504 }else { //user provided distance matrix
506 //for each calculator
507 for(int i = 0 ; i < calculators.size(); i++) {
508 //create a new filename
510 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + calculators[i]->getName() + ".amova";
511 m->openOutputFile(outputFile, out);
512 outputNames.push_back(outputFile); outputTypes["amova"].push_back(outputFile);
515 out << "groupsCompared\tMeanSquaredWithin\tMeanSquaredAmong\tFstatistic\tpValue" << endl;
519 ReadPhylipVector readMatrix(phylipfile);
520 vector< vector<double> > completeMatrix;
521 vector<string> names = readMatrix.read(completeMatrix);
523 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
525 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
528 vector<int> processIDS;
530 //loop through and create all the processes you want
531 while (process != processors) {
535 processIDS.push_back(pid);
538 driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
541 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
542 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
548 driver(lines[0].start, lines[0].num, names, "", completeMatrix);
550 //force parent to wait until all the processes are done
551 for (int i=0;i<(processors-1);i++) {
552 int temp = processIDS[i];
557 for(int i = 0 ; i < calculators.size(); i++) {
558 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + calculators[i]->getName() + ".amova";
560 for (int j = 0; j < processIDS.size(); j++) {
561 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
562 remove((outputFile + toString(processIDS[j])).c_str());
567 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
574 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
576 m->mothurOutEndLine();
577 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
578 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
579 m->mothurOutEndLine();
583 catch(exception& e) {
584 m->errorOut(e, "AmovaCommand", "execute");
588 //**********************************************************************************************************************
590 int AmovaCommand::process(vector<SharedRAbundVector*> thisLookup) {
593 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
595 driver(0, namesOfGroupCombos.size(), thisLookup, "");
598 vector<int> processIDS;
600 //loop through and create all the processes you want
601 while (process != processors) {
605 processIDS.push_back(pid);
608 driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
611 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
612 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
618 driver(lines[0].start, lines[0].num, thisLookup, "");
620 //force parent to wait until all the processes are done
621 for (int i=0;i<(processors-1);i++) {
622 int temp = processIDS[i];
627 for(int i = 0 ; i < calculators.size(); i++) {
628 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova";
630 for (int j = 0; j < processIDS.size(); j++) {
631 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
632 remove((outputFile + toString(processIDS[j])).c_str());
637 driver(0, namesOfGroupCombos.size(), thisLookUp, "");
642 catch(exception& e) {
643 m->errorOut(e, "AmovaCommand", "process");
647 //**********************************************************************************************************************
649 int AmovaCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
651 vector<SharedRAbundVector*> subset;
654 //for each calculator
655 for(int i = 0 ; i < calculators.size(); i++) {
657 //create a new filename
659 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova" + pidValue;
660 m->openOutputFileAppend(outputFile, out);
661 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
664 for (int c = start; c < (start+num); c++) {
666 if (m->control_pressed) { out.close(); return 0; }
669 vector<string> setNames;
670 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
672 vector<SharedRAbundVector*> thisCombosLookup;
673 vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
674 for (int k = 0; k < thisLookup.size(); k++) {
675 string thisGroup = thisLookup[k]->getGroup();
677 //is this group for a set we want to compare??
678 if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {
679 thisCombosLookup.push_back(thisLookup[k]);
680 thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
685 int numGroups = thisCombosLookup.size();
687 //calc the distance matrix
689 matrix.resize(numGroups);
690 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
692 if (thisCombosLookup.size() != 0) {
693 m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine();
696 out << thisLookup[0]->getLabel() << '\t';
697 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
698 else { out << "all" << '\t'; }
700 for (int k = 0; k < thisCombosLookup.size(); k++) {
701 for (int l = k; l < thisCombosLookup.size(); l++) {
703 if (m->control_pressed) { out.close(); return 0; }
705 if (k != l) { //we dont need to similiarity of a groups to itself
706 //get estimated similarity between 2 groups
707 subset.clear(); //clear out old pair of sharedrabunds
708 //add new pair of sharedrabunds
709 subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]);
711 //if this calc needs all groups to calculate the pair load all groups
712 if (calculators[i]->getNeedsAll()) {
713 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
714 for (int w = 0; w < thisCombosLookup.size(); w++) {
715 if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
719 data = calculators[i]->getValues(subset); //saves the calculator outputs
721 //save values in similarity matrix
722 matrix[k][l] = 1.0 - data[0];
723 matrix[l][k] = 1.0 - data[0];
729 calcAmova(out, setNames.size(), thisCombosLookupSets);
739 catch(exception& e) {
740 m->errorOut(e, "AmovaCommand", "driver");
744 //**********************************************************************************************************************
746 int AmovaCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
749 //for each calculator
750 for(int i = 0 ; i < calculators.size(); i++) {
752 //create a new filename
754 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".amova" + pidValue;
755 m->openOutputFileAppend(outputFile, out);
756 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
759 for (int c = start; c < (start+num); c++) {
761 if (m->control_pressed) { out.close(); return 0; }
764 vector<string> setNames;
765 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
767 vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
769 for (int k = 0; k < names.size(); k++) {
770 //is this group for a set we want to compare??
771 if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {
772 thisCombosSets.push_back(designMap->getGroup(names[k]));
773 indexes.insert(k); //save indexes of valid rows in matrix for submatrix
777 int numGroups = thisCombosSets.size();
779 //calc the distance matrix
781 matrix.resize(numGroups);
782 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
784 if (thisCombosSets.size() != 0) {
785 m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine();
788 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
789 else { out << "all" << '\t'; }
794 for (int j = 0; j < completeMatrix.size(); j++) {
796 if (indexes.count(j) != 0) { //we want at least part of this row
797 for (int k = 0; k < completeMatrix[j].size(); k++) {
799 if (indexes.count(k) != 0) { //we want this distance
800 matrix[rowCount][columnCount] = completeMatrix[j][k];
801 matrix[columnCount][rowCount] = completeMatrix[j][k];
810 calcAmova(out, setNames.size(), thisCombosSets);
820 catch(exception& e) {
821 m->errorOut(e, "AmovaCommand", "driver");
825 //**********************************************************************************************************************
826 int AmovaCommand::calcAmova(ofstream& out, int numTreatments, vector<string> thisCombosLookupSets) {
829 double SSWithin, SSTotal, SSAmoung, MSWithin, MSAmoung, FStatistic, pValue;
831 SSWithin = calcWithin(matrix, numTreatments, thisCombosLookupSets);
832 SSTotal = calcTotal(numTreatments);
835 for (int i = 0; i < iters; i++) {
836 if (m->control_pressed) { break; }
838 //randomly shuffle names to randomize the matrix
839 vector<string> copyNames = thisCombosLookupSets;
840 random_shuffle(copyNames.begin(), copyNames.end());
842 double randomSSWithin = calcWithin(matrix, numTreatments, copyNames);
844 if (randomSSWithin <= SSWithin) { count++; }
847 SSAmoung = SSTotal - SSWithin;
848 MSWithin = SSWithin / (double) (matrix.size() - numTreatments);
849 MSAmoung = SSAmoung / (double) (numTreatments - 1);
850 FStatistic = MSAmoung / MSWithin;
852 pValue = count / (float) iters;
854 out << MSWithin << '\t' << MSAmoung << '\t' << FStatistic << '\t' << pValue << endl;
858 catch(exception& e) {
859 m->errorOut(e, "AmovaCommand", "calcAmova");
863 //**********************************************************************************************************************
864 double AmovaCommand::calcWithin(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets) {
868 //traverse lower triangle
869 for (int k = 0; k < thisMatrix.size(); k++) {
870 for (int l = k; l < thisMatrix[k].size(); l++) {
872 //if you are from the same treatment then eij is 1 so add, else eij = 0
873 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) {
874 within += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
879 //1 / (numSamples / numTreatments)
880 within *= (1.0 / (float) (thisMatrix.size() / (float) numTreatments));
884 catch(exception& e) {
885 m->errorOut(e, "AmovaCommand", "calcWithin");
889 //**********************************************************************************************************************
890 double AmovaCommand::calcTotal(int numTreatments) {
894 //traverse lower triangle
895 for (int k = 0; k < matrix.size(); k++) {
896 for (int l = k; l < matrix[k].size(); l++) {
897 total += (matrix[k][l] * matrix[k][l]); //dij^2
902 total *= (1.0 / (float) matrix.size());
906 catch(exception& e) {
907 m->errorOut(e, "AmovaCommand", "calcTotal");
911 //**********************************************************************************************************************