5 * Created by westcott on 2/8/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "homovacommand.h"
11 #include "sharedutilities.h"
12 #include "sharedsobscollectsummary.h"
13 #include "sharedchao1.h"
14 #include "sharedace.h"
15 #include "sharednseqs.h"
16 #include "sharedjabund.h"
17 #include "sharedsorabund.h"
18 #include "sharedjclass.h"
19 #include "sharedsorclass.h"
20 #include "sharedjest.h"
21 #include "sharedsorest.h"
22 #include "sharedthetayc.h"
23 #include "sharedthetan.h"
24 #include "sharedkstest.h"
25 #include "whittaker.h"
26 #include "sharedochiai.h"
27 #include "sharedanderbergs.h"
28 #include "sharedkulczynski.h"
29 #include "sharedkulczynskicody.h"
30 #include "sharedlennon.h"
31 #include "sharedmorisitahorn.h"
32 #include "sharedbraycurtis.h"
33 #include "sharedjackknife.h"
34 #include "whittaker.h"
37 #include "structeuclidean.h"
38 #include "structchord.h"
39 #include "hellinger.h"
40 #include "manhattan.h"
41 #include "structpearson.h"
44 #include "structkulczynski.h"
45 #include "structchi2.h"
46 #include "speciesprofile.h"
51 #include "memeuclidean.h"
52 #include "mempearson.h"
54 //**********************************************************************************************************************
55 vector<string> HomovaCommand::getValidParameters(){
57 string Array[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
58 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
62 m->errorOut(e, "HomovaCommand", "getValidParameters");
66 //**********************************************************************************************************************
67 HomovaCommand::HomovaCommand(){
70 //initialize outputTypes
71 vector<string> tempOutNames;
72 outputTypes["homova"] = tempOutNames;
75 m->errorOut(e, "HomovaCommand", "HomovaCommand");
79 //**********************************************************************************************************************
80 vector<string> HomovaCommand::getRequiredParameters(){
82 string Array[] = {"design"};
83 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
87 m->errorOut(e, "HomovaCommand", "getRequiredParameters");
91 //**********************************************************************************************************************
92 vector<string> HomovaCommand::getRequiredFiles(){
95 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
99 m->errorOut(e, "HomovaCommand", "getRequiredFiles");
103 //**********************************************************************************************************************
105 HomovaCommand::HomovaCommand(string option) {
107 globaldata = GlobalData::getInstance();
112 //allow user to run help
113 if(option == "help") { help(); abort = true; }
116 //valid paramters for this command
117 string AlignArray[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
118 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
120 OptionParser parser(option);
121 map<string,string> parameters = parser.getParameters();
123 ValidParameters validParameter;
125 //check to make sure all parameters are valid for command
126 map<string,string>::iterator it;
127 for (it = parameters.begin(); it != parameters.end(); it++) {
128 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
131 //initialize outputTypes
132 vector<string> tempOutNames;
133 outputTypes["homova"] = tempOutNames;
135 //if the user changes the output directory command factory will send this info to us in the output parameter
136 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
138 //if the user changes the input directory command factory will send this info to us in the output parameter
139 string inputDir = validParameter.validFile(parameters, "inputdir", false);
140 if (inputDir == "not found"){ inputDir = ""; }
143 it = parameters.find("design");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["design"] = inputDir + it->second; }
151 it = parameters.find("phylip");
152 //user has given a template file
153 if(it != parameters.end()){
154 path = m->hasPath(it->second);
155 //if the user has not given a path then, add inputdir. else leave path alone.
156 if (path == "") { parameters["phylip"] = inputDir + it->second; }
160 phylipfile = validParameter.validFile(parameters, "phylip", true);
161 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
162 else if (phylipfile == "not found") { phylipfile = ""; }
163 else { globaldata->newRead(); format = "phylip"; globaldata->setPhylipFile(phylipfile); }
165 //check for required parameters
166 designfile = validParameter.validFile(parameters, "design", true);
167 if (designfile == "not open") { abort = true; }
168 else if (designfile == "not found") { designfile = ""; m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }
170 //make sure the user has already run the read.otu command
171 if ((globaldata->getSharedFile() == "")) {
172 if ((phylipfile == "")) {
173 m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the homova command."); m->mothurOutEndLine(); abort = true;
175 }else { sharedfile = globaldata->getSharedFile(); }
177 //use distance matrix if one is provided
178 if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
180 //check for optional parameter and set defaults
181 // ...at some point should added some additional type checking...
182 label = validParameter.validFile(parameters, "label", false);
183 if (label == "not found") { label = ""; }
185 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
186 else { allLines = 1; }
189 //if the user has not specified any labels use the ones from read.otu
191 allLines = globaldata->allLines;
192 labels = globaldata->labels;
195 groups = validParameter.validFile(parameters, "groups", false);
196 if (groups == "not found") { groups = ""; }
198 m->splitAtDash(groups, Groups);
199 globaldata->Groups = Groups;
202 sets = validParameter.validFile(parameters, "sets", false);
203 if (sets == "not found") { sets = ""; pickedGroups = false; }
206 m->splitAtDash(sets, Sets);
210 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
211 convert(temp, iters);
213 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
214 convert(temp, processors);
216 vector<string> Estimators;
217 calc = validParameter.validFile(parameters, "calc", false);
218 if (calc == "not found") { calc = "morisitahorn"; }
219 m->splitAtDash(calc, Estimators);
221 if (abort == false) {
223 ValidCalculators* validCalculator = new ValidCalculators();
225 for (int i=0; i<Estimators.size(); i++) {
226 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
227 if (Estimators[i] == "sharedsobs") {
228 calculators.push_back(new SharedSobsCS());
229 }else if (Estimators[i] == "sharedchao") {
230 calculators.push_back(new SharedChao1());
231 }else if (Estimators[i] == "sharedace") {
232 calculators.push_back(new SharedAce());
233 }else if (Estimators[i] == "jabund") {
234 calculators.push_back(new JAbund());
235 }else if (Estimators[i] == "sorabund") {
236 calculators.push_back(new SorAbund());
237 }else if (Estimators[i] == "jclass") {
238 calculators.push_back(new Jclass());
239 }else if (Estimators[i] == "sorclass") {
240 calculators.push_back(new SorClass());
241 }else if (Estimators[i] == "jest") {
242 calculators.push_back(new Jest());
243 }else if (Estimators[i] == "sorest") {
244 calculators.push_back(new SorEst());
245 }else if (Estimators[i] == "thetayc") {
246 calculators.push_back(new ThetaYC());
247 }else if (Estimators[i] == "thetan") {
248 calculators.push_back(new ThetaN());
249 }else if (Estimators[i] == "kstest") {
250 calculators.push_back(new KSTest());
251 }else if (Estimators[i] == "sharednseqs") {
252 calculators.push_back(new SharedNSeqs());
253 }else if (Estimators[i] == "ochiai") {
254 calculators.push_back(new Ochiai());
255 }else if (Estimators[i] == "anderberg") {
256 calculators.push_back(new Anderberg());
257 }else if (Estimators[i] == "kulczynski") {
258 calculators.push_back(new Kulczynski());
259 }else if (Estimators[i] == "kulczynskicody") {
260 calculators.push_back(new KulczynskiCody());
261 }else if (Estimators[i] == "lennon") {
262 calculators.push_back(new Lennon());
263 }else if (Estimators[i] == "morisitahorn") {
264 calculators.push_back(new MorHorn());
265 }else if (Estimators[i] == "braycurtis") {
266 calculators.push_back(new BrayCurtis());
267 }else if (Estimators[i] == "whittaker") {
268 calculators.push_back(new Whittaker());
269 }else if (Estimators[i] == "odum") {
270 calculators.push_back(new Odum());
271 }else if (Estimators[i] == "canberra") {
272 calculators.push_back(new Canberra());
273 }else if (Estimators[i] == "structeuclidean") {
274 calculators.push_back(new StructEuclidean());
275 }else if (Estimators[i] == "structchord") {
276 calculators.push_back(new StructChord());
277 }else if (Estimators[i] == "hellinger") {
278 calculators.push_back(new Hellinger());
279 }else if (Estimators[i] == "manhattan") {
280 calculators.push_back(new Manhattan());
281 }else if (Estimators[i] == "structpearson") {
282 calculators.push_back(new StructPearson());
283 }else if (Estimators[i] == "soergel") {
284 calculators.push_back(new Soergel());
285 }else if (Estimators[i] == "spearman") {
286 calculators.push_back(new Spearman());
287 }else if (Estimators[i] == "structkulczynski") {
288 calculators.push_back(new StructKulczynski());
289 }else if (Estimators[i] == "speciesprofile") {
290 calculators.push_back(new SpeciesProfile());
291 }else if (Estimators[i] == "hamming") {
292 calculators.push_back(new Hamming());
293 }else if (Estimators[i] == "structchi2") {
294 calculators.push_back(new StructChi2());
295 }else if (Estimators[i] == "gower") {
296 calculators.push_back(new Gower());
297 }else if (Estimators[i] == "memchi2") {
298 calculators.push_back(new MemChi2());
299 }else if (Estimators[i] == "memchord") {
300 calculators.push_back(new MemChord());
301 }else if (Estimators[i] == "memeuclidean") {
302 calculators.push_back(new MemEuclidean());
303 }else if (Estimators[i] == "mempearson") {
304 calculators.push_back(new MemPearson());
312 catch(exception& e) {
313 m->errorOut(e, "HomovaCommand", "HomovaCommand");
318 //**********************************************************************************************************************
320 void HomovaCommand::help(){
322 m->mothurOut("The homova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
323 m->mothurOut("The homova command outputs a .homova file. \n");
324 m->mothurOut("The homova command parameters are phylip, iters, groups, label, design, sets and processors. The design parameter is required.\n");
325 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running amova. It is required. \n");
326 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
327 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile. To run the pairwise comparisons of all sets use sets=all.\n");
328 m->mothurOut("The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n");
329 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
330 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
331 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
332 m->mothurOut("The homova command should be in the following format: homova(design=yourDesignFile).\n");
333 m->mothurOut("Example amova(design=temp.design, groups=A-B-C).\n");
334 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
337 catch(exception& e) {
338 m->errorOut(e, "HomovaCommand", "help");
343 //**********************************************************************************************************************
345 HomovaCommand::~HomovaCommand(){}
347 //**********************************************************************************************************************
349 int HomovaCommand::execute(){
352 if (abort == true) { return 0; }
355 designMap = new GroupMap(designfile);
356 designMap->readDesignMap();
358 //make sure sets are all in designMap
359 SharedUtil* util = new SharedUtil();
360 util->setGroups(Sets, designMap->namesOfGroups);
363 //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
364 int numGroups = Sets.size();
366 for (int a=0; a<numGroups; a++) {
367 for (int l = 0; l < a; l++) {
368 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
369 namesOfGroupCombos.push_back(groups);
372 }else { //one giant compare
373 vector<string> groups;
374 for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
375 namesOfGroupCombos.push_back(groups);
379 if (numGroups == 2) { processors = 1; }
380 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
382 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
384 int numPairs = namesOfGroupCombos.size();
385 int numPairsPerProcessor = numPairs / processors;
387 for (int i = 0; i < processors; i++) {
388 int startPos = i * numPairsPerProcessor;
389 if(i == processors - 1){
390 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
392 lines.push_back(linePair(startPos, numPairsPerProcessor));
397 if (sharedfile != "") { //create distance matrix for each label
399 if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
401 //for each calculator
402 for(int i = 0 ; i < calculators.size(); i++) {
404 //create a new filename
406 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".homova";
407 m->openOutputFile(outputFile, out);
408 outputNames.push_back(outputFile); outputTypes["homova"].push_back(outputFile);
411 out << "label\tgroupsCompared\tBValue\tpValue" << endl;
415 InputData input(sharedfile, "sharedfile");
416 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
417 string lastLabel = lookup[0]->getLabel();
419 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
420 set<string> processedLabels;
421 set<string> userLabels = labels;
423 //sanity check between shared file groups and design file groups
424 for (int i = 0; i < lookup.size(); i++) {
425 string group = designMap->getGroup(lookup[i]->getGroup());
427 if (group == "not found") { m->control_pressed = true; m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine(); }
430 //as long as you are not at the end of the file or done wih the lines you want
431 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
433 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
435 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
437 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
440 processedLabels.insert(lookup[0]->getLabel());
441 userLabels.erase(lookup[0]->getLabel());
444 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
445 string saveLabel = lookup[0]->getLabel();
447 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
448 lookup = input.getSharedRAbundVectors(lastLabel);
449 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
453 processedLabels.insert(lookup[0]->getLabel());
454 userLabels.erase(lookup[0]->getLabel());
456 //restore real lastlabel to save below
457 lookup[0]->setLabel(saveLabel);
460 lastLabel = lookup[0]->getLabel();
461 //prevent memory leak
462 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
464 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
466 //get next line to process
467 lookup = input.getSharedRAbundVectors();
470 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
472 //output error messages about any remaining user labels
473 set<string>::iterator it;
474 bool needToRun = false;
475 for (it = userLabels.begin(); it != userLabels.end(); it++) {
476 m->mothurOut("Your file does not include the label " + *it);
477 if (processedLabels.count(lastLabel) != 1) {
478 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
481 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
485 //run last label if you need to
486 if (needToRun == true) {
487 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
488 lookup = input.getSharedRAbundVectors(lastLabel);
490 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
494 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
497 //reset groups parameter
498 globaldata->Groups.clear();
501 }else { //user provided distance matrix
503 if (outputDir == "") { outputDir = m->hasPath(phylipfile); }
505 //create a new filename
507 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "homova";
508 m->openOutputFile(outputFile, out);
509 outputNames.push_back(outputFile); outputTypes["homova"].push_back(outputFile);
512 out << "groupsCompared\tBValue\tpValue" << endl;
515 ReadPhylipVector readMatrix(phylipfile);
516 vector< vector<double> > completeMatrix;
517 vector<string> names = readMatrix.read(completeMatrix);
519 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
521 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
524 vector<int> processIDS;
526 //loop through and create all the processes you want
527 while (process != processors) {
531 processIDS.push_back(pid);
534 driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
537 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
538 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
544 driver(lines[0].start, lines[0].num, names, "", completeMatrix);
546 //force parent to wait until all the processes are done
547 for (int i=0;i<(processors-1);i++) {
548 int temp = processIDS[i];
553 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "homova";
554 for (int j = 0; j < processIDS.size(); j++) {
555 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
556 remove((outputFile + toString(processIDS[j])).c_str());
561 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
568 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
570 m->mothurOutEndLine();
571 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
572 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
573 m->mothurOutEndLine();
577 catch(exception& e) {
578 m->errorOut(e, "HomovaCommand", "execute");
582 //**********************************************************************************************************************
584 int HomovaCommand::process(vector<SharedRAbundVector*> thisLookup) {
587 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
589 driver(0, namesOfGroupCombos.size(), thisLookup, "");
592 vector<int> processIDS;
594 //loop through and create all the processes you want
595 while (process != processors) {
599 processIDS.push_back(pid);
602 driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
605 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
606 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
612 driver(lines[0].start, lines[0].num, thisLookup, "");
614 //force parent to wait until all the processes are done
615 for (int i=0;i<(processors-1);i++) {
616 int temp = processIDS[i];
621 for(int i = 0 ; i < calculators.size(); i++) {
622 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".homova";
624 for (int j = 0; j < processIDS.size(); j++) {
625 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
626 remove((outputFile + toString(processIDS[j])).c_str());
631 driver(0, namesOfGroupCombos.size(), thisLookUp, "");
636 catch(exception& e) {
637 m->errorOut(e, "HomovaCommand", "process");
641 //**********************************************************************************************************************
643 int HomovaCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
645 vector<SharedRAbundVector*> subset;
648 //for each calculator
649 for(int i = 0 ; i < calculators.size(); i++) {
651 //create a new filename
653 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".homova" + pidValue;
654 m->openOutputFileAppend(outputFile, out);
655 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
658 for (int c = start; c < (start+num); c++) {
660 if (m->control_pressed) { out.close(); return 0; }
663 vector<string> setNames;
664 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
666 vector<SharedRAbundVector*> thisCombosLookup;
667 vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
668 for (int k = 0; k < thisLookup.size(); k++) {
669 string thisGroup = thisLookup[k]->getGroup();
671 //is this group for a set we want to compare??
672 if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {
673 thisCombosLookup.push_back(thisLookup[k]);
674 thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
679 int numGroups = thisCombosLookup.size();
681 //calc the distance matrix
683 matrix.resize(numGroups);
684 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
686 if (thisCombosLookup.size() == 0) {
687 m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine();
690 out << thisLookup[0]->getLabel() << '\t';
691 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
692 else { out << "all" << '\t'; }
694 for (int k = 0; k < thisCombosLookup.size(); k++) {
695 for (int l = k; l < thisCombosLookup.size(); l++) {
697 if (m->control_pressed) { out.close(); return 0; }
699 if (k != l) { //we dont need to similiarity of a groups to itself
700 //get estimated similarity between 2 groups
701 subset.clear(); //clear out old pair of sharedrabunds
702 //add new pair of sharedrabunds
703 subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]);
705 //if this calc needs all groups to calculate the pair load all groups
706 if (calculators[i]->getNeedsAll()) {
707 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
708 for (int w = 0; w < thisCombosLookup.size(); w++) {
709 if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
713 data = calculators[i]->getValues(subset); //saves the calculator outputs
715 //save values in similarity matrix
716 matrix[k][l] = 1.0 - data[0];
717 matrix[l][k] = 1.0 - data[0];
723 calcHomova(out, setNames.size(), thisCombosLookupSets);
733 catch(exception& e) {
734 m->errorOut(e, "HomovaCommand", "driver");
738 //**********************************************************************************************************************
740 int HomovaCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
743 //create a new filename
745 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "homova" + pidValue;
746 m->openOutputFileAppend(outputFile, out);
747 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
750 for (int c = start; c < (start+num); c++) {
752 if (m->control_pressed) { out.close(); return 0; }
755 vector<string> setNames;
756 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
758 vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
760 for (int k = 0; k < names.size(); k++) {
761 //is this group for a set we want to compare??
762 if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {
763 thisCombosSets.push_back(designMap->getGroup(names[k]));
764 indexes.insert(k); //save indexes of valid rows in matrix for submatrix
768 int numGroups = thisCombosSets.size();
770 //calc the distance matrix
772 matrix.resize(numGroups);
774 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
776 if (thisCombosSets.size() == 0) {
777 m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine();
780 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
781 else { out << "all" << '\t'; }
785 for (int j = 0; j < completeMatrix.size(); j++) {
787 if (indexes.count(j) != 0) { //we want at least part of this row
789 for (int k = 0; k < completeMatrix[j].size(); k++) {
791 if (indexes.count(k) != 0) { //we want this distance
792 matrix[rowCount][columnCount] = completeMatrix[j][k];
793 matrix[columnCount][rowCount] = completeMatrix[j][k];
802 calcHomova(out, setNames.size(), thisCombosSets);
812 catch(exception& e) {
813 m->errorOut(e, "HomovaCommand", "driver");
817 //**********************************************************************************************************************
818 int HomovaCommand::calcHomova(ofstream& out, int numTreatments, vector<string> thisCombosLookupSets) {
821 double SSTotal, BValue, pValue;
822 map<string, double> SSWithin;
823 map<string, double>::iterator it;
825 SSTotal = calcWithin(matrix, numTreatments, thisCombosLookupSets);
826 int numSamples = matrix.size();
829 map<string, int> counts;
830 SSWithin = calcWithinEach(matrix, numTreatments, thisCombosLookupSets, counts);
833 double sumDenom = 0.0;
834 for (it = SSWithin.begin(); it != SSWithin.end(); it++) {
835 double temp2 = (it->second) / (double) (counts[it->first] - 1);
836 sum += ((counts[it->first] - 1) * log(temp2));
837 sumDenom += ((1 / (double) (counts[it->first] - 1)) - ( 1 / (double) (numSamples - numTreatments) ));
840 double temp = SSTotal / (double) (numSamples - numTreatments);
841 double numeratorTerm1 = (numSamples - numTreatments) * log(temp);
842 double numerator = numeratorTerm1 - sum;
843 double denom = 1 + ((1 / (double) (3 * (numTreatments - 1))) * sumDenom);
845 BValue = numerator / denom;
849 for (int i = 0; i < iters; i++) {
850 if (m->control_pressed) { break; }
852 //randomly shuffle names to randomize the matrix
853 vector<string> copyNames = thisCombosLookupSets;
854 random_shuffle(copyNames.begin(), copyNames.end());
856 SSTotal = calcWithin(matrix, numTreatments, copyNames);
859 map<string, double> randomSSWithin = calcWithinEach(matrix, numTreatments, copyNames, counts);
863 for (it = randomSSWithin.begin(); it != randomSSWithin.end(); it++) {
864 double temp2 = (it->second) / (double) (counts[it->first] - numTreatments);
865 sum += ((counts[it->first] - 1) * log(temp2));
866 sumDenom += ((1 / (double) (counts[it->first] - 1)) - ( 1 / (double) (numSamples - numTreatments) ));
869 temp = SSTotal / (double) (numSamples - numTreatments);
870 numeratorTerm1 = (numSamples - numTreatments) * log(temp);
871 numerator = numeratorTerm1 - sum;
872 denom = 1 + ((1 / (double) (3 * (numTreatments - 1))) * sumDenom);
874 double randomBValue = numerator / denom;
876 if (randomBValue <= BValue) { count++; }
879 pValue = count / (float) iters;
881 out << BValue << '\t' << pValue << endl;
885 catch(exception& e) {
886 m->errorOut(e, "HomovaCommand", "calcHomova");
890 //**********************************************************************************************************************
891 map<string, double> HomovaCommand::calcWithinEach(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets, map<string, int>& count) {
893 map<string, double> within; //maps treatment to within value
894 map<string, double>::iterator it;
895 map<string, int>::iterator itCount;
897 for (int i = 0; i < thisCombosLookupSets.size(); i++) {
898 itCount = count.find(thisCombosLookupSets[i]);
900 if (itCount == count.end()) { //first time we have seen this treatment
901 count[thisCombosLookupSets[i]] = 1;
903 count[thisCombosLookupSets[i]]++;
907 within[thisCombosLookupSets[i]] = 0.0;
911 //traverse lower triangle
912 for (int k = 0; k < thisMatrix.size(); k++) {
913 for (int l = k; l < thisMatrix[k].size(); l++) {
915 //if you are from the same treatment then eij is 1 so add, else eij = 0
916 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) {
917 within[thisCombosLookupSets[k]] += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
922 //1 / (numSamples in this treatment)
923 for (it = within.begin(); it != within.end(); it++) {
924 (it->second) *= (1.0 / (float) count[it->first]);
929 catch(exception& e) {
930 m->errorOut(e, "HomovaCommand", "calcWithinEach");
934 //**********************************************************************************************************************
935 double HomovaCommand::calcWithin(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets) {
939 //traverse lower triangle
940 for (int k = 0; k < thisMatrix.size(); k++) {
941 for (int l = k; l < thisMatrix[k].size(); l++) {
943 //if you are from the same treatment then eij is 1 so add, else eij = 0
944 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) {
945 total += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
950 //1 / (numSamples / numTreatments)
951 total *= (1.0 / (float) (thisMatrix.size() / (float) numTreatments));
955 catch(exception& e) {
956 m->errorOut(e, "HomovaCommand", "calcWithin");
960 //**********************************************************************************************************************