5 * Created by westcott on 2/8/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "homovacommand.h"
11 #include "sharedutilities.h"
12 #include "sharedsobscollectsummary.h"
13 #include "sharedchao1.h"
14 #include "sharedace.h"
15 #include "sharednseqs.h"
16 #include "sharedjabund.h"
17 #include "sharedsorabund.h"
18 #include "sharedjclass.h"
19 #include "sharedsorclass.h"
20 #include "sharedjest.h"
21 #include "sharedsorest.h"
22 #include "sharedthetayc.h"
23 #include "sharedthetan.h"
24 #include "sharedkstest.h"
25 #include "whittaker.h"
26 #include "sharedochiai.h"
27 #include "sharedanderbergs.h"
28 #include "sharedkulczynski.h"
29 #include "sharedkulczynskicody.h"
30 #include "sharedlennon.h"
31 #include "sharedmorisitahorn.h"
32 #include "sharedbraycurtis.h"
33 #include "sharedjackknife.h"
34 #include "whittaker.h"
37 #include "structeuclidean.h"
38 #include "structchord.h"
39 #include "hellinger.h"
40 #include "manhattan.h"
41 #include "structpearson.h"
44 #include "structkulczynski.h"
45 #include "structchi2.h"
46 #include "speciesprofile.h"
51 #include "memeuclidean.h"
52 #include "mempearson.h"
54 //**********************************************************************************************************************
55 vector<string> HomovaCommand::getValidParameters(){
57 string Array[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
58 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
62 m->errorOut(e, "HomovaCommand", "getValidParameters");
66 //**********************************************************************************************************************
67 HomovaCommand::HomovaCommand(){
69 abort = true; calledHelp = true;
70 vector<string> tempOutNames;
71 outputTypes["homova"] = tempOutNames;
74 m->errorOut(e, "HomovaCommand", "HomovaCommand");
78 //**********************************************************************************************************************
79 vector<string> HomovaCommand::getRequiredParameters(){
81 string Array[] = {"design"};
82 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
86 m->errorOut(e, "HomovaCommand", "getRequiredParameters");
90 //**********************************************************************************************************************
91 vector<string> HomovaCommand::getRequiredFiles(){
94 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
98 m->errorOut(e, "HomovaCommand", "getRequiredFiles");
102 //**********************************************************************************************************************
104 HomovaCommand::HomovaCommand(string option) {
106 globaldata = GlobalData::getInstance();
107 abort = false; calledHelp = false;
111 //allow user to run help
112 if(option == "help") { help(); abort = true; calledHelp = true; }
115 //valid paramters for this command
116 string AlignArray[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
117 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
119 OptionParser parser(option);
120 map<string,string> parameters = parser.getParameters();
122 ValidParameters validParameter;
124 //check to make sure all parameters are valid for command
125 map<string,string>::iterator it;
126 for (it = parameters.begin(); it != parameters.end(); it++) {
127 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
130 //initialize outputTypes
131 vector<string> tempOutNames;
132 outputTypes["homova"] = tempOutNames;
134 //if the user changes the output directory command factory will send this info to us in the output parameter
135 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
137 //if the user changes the input directory command factory will send this info to us in the output parameter
138 string inputDir = validParameter.validFile(parameters, "inputdir", false);
139 if (inputDir == "not found"){ inputDir = ""; }
142 it = parameters.find("design");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["design"] = inputDir + it->second; }
150 it = parameters.find("phylip");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["phylip"] = inputDir + it->second; }
159 phylipfile = validParameter.validFile(parameters, "phylip", true);
160 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
161 else if (phylipfile == "not found") { phylipfile = ""; }
162 else { globaldata->newRead(); format = "phylip"; globaldata->setPhylipFile(phylipfile); }
164 //check for required parameters
165 designfile = validParameter.validFile(parameters, "design", true);
166 if (designfile == "not open") { abort = true; }
167 else if (designfile == "not found") { designfile = ""; m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }
169 //make sure the user has already run the read.otu command
170 if ((globaldata->getSharedFile() == "")) {
171 if ((phylipfile == "")) {
172 m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the homova command."); m->mothurOutEndLine(); abort = true;
174 }else { sharedfile = globaldata->getSharedFile(); }
176 //use distance matrix if one is provided
177 if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
179 //check for optional parameter and set defaults
180 // ...at some point should added some additional type checking...
181 label = validParameter.validFile(parameters, "label", false);
182 if (label == "not found") { label = ""; }
184 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
185 else { allLines = 1; }
188 //if the user has not specified any labels use the ones from read.otu
190 allLines = globaldata->allLines;
191 labels = globaldata->labels;
194 groups = validParameter.validFile(parameters, "groups", false);
195 if (groups == "not found") { groups = ""; }
197 m->splitAtDash(groups, Groups);
198 globaldata->Groups = Groups;
201 sets = validParameter.validFile(parameters, "sets", false);
202 if (sets == "not found") { sets = ""; pickedGroups = false; }
205 m->splitAtDash(sets, Sets);
209 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
210 convert(temp, iters);
212 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
213 convert(temp, processors);
215 vector<string> Estimators;
216 calc = validParameter.validFile(parameters, "calc", false);
217 if (calc == "not found") { calc = "morisitahorn"; }
218 m->splitAtDash(calc, Estimators);
220 if (abort == false) {
222 ValidCalculators* validCalculator = new ValidCalculators();
224 for (int i=0; i<Estimators.size(); i++) {
225 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
226 if (Estimators[i] == "sharedsobs") {
227 calculators.push_back(new SharedSobsCS());
228 }else if (Estimators[i] == "sharedchao") {
229 calculators.push_back(new SharedChao1());
230 }else if (Estimators[i] == "sharedace") {
231 calculators.push_back(new SharedAce());
232 }else if (Estimators[i] == "jabund") {
233 calculators.push_back(new JAbund());
234 }else if (Estimators[i] == "sorabund") {
235 calculators.push_back(new SorAbund());
236 }else if (Estimators[i] == "jclass") {
237 calculators.push_back(new Jclass());
238 }else if (Estimators[i] == "sorclass") {
239 calculators.push_back(new SorClass());
240 }else if (Estimators[i] == "jest") {
241 calculators.push_back(new Jest());
242 }else if (Estimators[i] == "sorest") {
243 calculators.push_back(new SorEst());
244 }else if (Estimators[i] == "thetayc") {
245 calculators.push_back(new ThetaYC());
246 }else if (Estimators[i] == "thetan") {
247 calculators.push_back(new ThetaN());
248 }else if (Estimators[i] == "kstest") {
249 calculators.push_back(new KSTest());
250 }else if (Estimators[i] == "sharednseqs") {
251 calculators.push_back(new SharedNSeqs());
252 }else if (Estimators[i] == "ochiai") {
253 calculators.push_back(new Ochiai());
254 }else if (Estimators[i] == "anderberg") {
255 calculators.push_back(new Anderberg());
256 }else if (Estimators[i] == "kulczynski") {
257 calculators.push_back(new Kulczynski());
258 }else if (Estimators[i] == "kulczynskicody") {
259 calculators.push_back(new KulczynskiCody());
260 }else if (Estimators[i] == "lennon") {
261 calculators.push_back(new Lennon());
262 }else if (Estimators[i] == "morisitahorn") {
263 calculators.push_back(new MorHorn());
264 }else if (Estimators[i] == "braycurtis") {
265 calculators.push_back(new BrayCurtis());
266 }else if (Estimators[i] == "whittaker") {
267 calculators.push_back(new Whittaker());
268 }else if (Estimators[i] == "odum") {
269 calculators.push_back(new Odum());
270 }else if (Estimators[i] == "canberra") {
271 calculators.push_back(new Canberra());
272 }else if (Estimators[i] == "structeuclidean") {
273 calculators.push_back(new StructEuclidean());
274 }else if (Estimators[i] == "structchord") {
275 calculators.push_back(new StructChord());
276 }else if (Estimators[i] == "hellinger") {
277 calculators.push_back(new Hellinger());
278 }else if (Estimators[i] == "manhattan") {
279 calculators.push_back(new Manhattan());
280 }else if (Estimators[i] == "structpearson") {
281 calculators.push_back(new StructPearson());
282 }else if (Estimators[i] == "soergel") {
283 calculators.push_back(new Soergel());
284 }else if (Estimators[i] == "spearman") {
285 calculators.push_back(new Spearman());
286 }else if (Estimators[i] == "structkulczynski") {
287 calculators.push_back(new StructKulczynski());
288 }else if (Estimators[i] == "speciesprofile") {
289 calculators.push_back(new SpeciesProfile());
290 }else if (Estimators[i] == "hamming") {
291 calculators.push_back(new Hamming());
292 }else if (Estimators[i] == "structchi2") {
293 calculators.push_back(new StructChi2());
294 }else if (Estimators[i] == "gower") {
295 calculators.push_back(new Gower());
296 }else if (Estimators[i] == "memchi2") {
297 calculators.push_back(new MemChi2());
298 }else if (Estimators[i] == "memchord") {
299 calculators.push_back(new MemChord());
300 }else if (Estimators[i] == "memeuclidean") {
301 calculators.push_back(new MemEuclidean());
302 }else if (Estimators[i] == "mempearson") {
303 calculators.push_back(new MemPearson());
311 catch(exception& e) {
312 m->errorOut(e, "HomovaCommand", "HomovaCommand");
317 //**********************************************************************************************************************
319 void HomovaCommand::help(){
321 m->mothurOut("The homova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
322 m->mothurOut("The homova command outputs a .homova file. \n");
323 m->mothurOut("The homova command parameters are phylip, iters, groups, label, design, sets and processors. The design parameter is required.\n");
324 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running amova. It is required. \n");
325 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
326 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile. To run the pairwise comparisons of all sets use sets=all.\n");
327 m->mothurOut("The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n");
328 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
329 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
330 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
331 m->mothurOut("The homova command should be in the following format: homova(design=yourDesignFile).\n");
332 m->mothurOut("Example amova(design=temp.design, groups=A-B-C).\n");
333 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
336 catch(exception& e) {
337 m->errorOut(e, "HomovaCommand", "help");
342 //**********************************************************************************************************************
344 HomovaCommand::~HomovaCommand(){}
346 //**********************************************************************************************************************
348 int HomovaCommand::execute(){
351 if (abort == true) { if (calledHelp) { return 0; } return 2; }
354 designMap = new GroupMap(designfile);
355 designMap->readDesignMap();
357 //make sure sets are all in designMap
358 SharedUtil* util = new SharedUtil();
359 util->setGroups(Sets, designMap->namesOfGroups);
362 //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
363 int numGroups = Sets.size();
365 for (int a=0; a<numGroups; a++) {
366 for (int l = 0; l < a; l++) {
367 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
368 namesOfGroupCombos.push_back(groups);
371 }else { //one giant compare
372 vector<string> groups;
373 for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
374 namesOfGroupCombos.push_back(groups);
378 if (numGroups == 2) { processors = 1; }
379 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
381 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
383 int numPairs = namesOfGroupCombos.size();
384 int numPairsPerProcessor = numPairs / processors;
386 for (int i = 0; i < processors; i++) {
387 int startPos = i * numPairsPerProcessor;
388 if(i == processors - 1){
389 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
391 lines.push_back(linePair(startPos, numPairsPerProcessor));
396 if (sharedfile != "") { //create distance matrix for each label
398 if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
400 //for each calculator
401 for(int i = 0 ; i < calculators.size(); i++) {
403 //create a new filename
405 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".homova";
406 m->openOutputFile(outputFile, out);
407 outputNames.push_back(outputFile); outputTypes["homova"].push_back(outputFile);
410 out << "label\tgroupsCompared\tBValue\tpValue" << endl;
414 InputData input(sharedfile, "sharedfile");
415 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
416 string lastLabel = lookup[0]->getLabel();
418 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
419 set<string> processedLabels;
420 set<string> userLabels = labels;
422 //sanity check between shared file groups and design file groups
423 for (int i = 0; i < lookup.size(); i++) {
424 string group = designMap->getGroup(lookup[i]->getGroup());
426 if (group == "not found") { m->control_pressed = true; m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine(); }
429 //as long as you are not at the end of the file or done wih the lines you want
430 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
432 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
434 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
436 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
439 processedLabels.insert(lookup[0]->getLabel());
440 userLabels.erase(lookup[0]->getLabel());
443 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
444 string saveLabel = lookup[0]->getLabel();
446 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
447 lookup = input.getSharedRAbundVectors(lastLabel);
448 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
452 processedLabels.insert(lookup[0]->getLabel());
453 userLabels.erase(lookup[0]->getLabel());
455 //restore real lastlabel to save below
456 lookup[0]->setLabel(saveLabel);
459 lastLabel = lookup[0]->getLabel();
460 //prevent memory leak
461 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
463 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
465 //get next line to process
466 lookup = input.getSharedRAbundVectors();
469 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
471 //output error messages about any remaining user labels
472 set<string>::iterator it;
473 bool needToRun = false;
474 for (it = userLabels.begin(); it != userLabels.end(); it++) {
475 m->mothurOut("Your file does not include the label " + *it);
476 if (processedLabels.count(lastLabel) != 1) {
477 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
480 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
484 //run last label if you need to
485 if (needToRun == true) {
486 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
487 lookup = input.getSharedRAbundVectors(lastLabel);
489 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
493 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
496 //reset groups parameter
497 globaldata->Groups.clear();
500 }else { //user provided distance matrix
502 if (outputDir == "") { outputDir = m->hasPath(phylipfile); }
504 //create a new filename
506 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "homova";
507 m->openOutputFile(outputFile, out);
508 outputNames.push_back(outputFile); outputTypes["homova"].push_back(outputFile);
511 out << "groupsCompared\tBValue\tpValue" << endl;
514 ReadPhylipVector readMatrix(phylipfile);
515 vector< vector<double> > completeMatrix;
516 vector<string> names = readMatrix.read(completeMatrix);
518 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
520 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
523 vector<int> processIDS;
525 //loop through and create all the processes you want
526 while (process != processors) {
530 processIDS.push_back(pid);
533 driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
536 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
537 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
543 driver(lines[0].start, lines[0].num, names, "", completeMatrix);
545 //force parent to wait until all the processes are done
546 for (int i=0;i<(processors-1);i++) {
547 int temp = processIDS[i];
552 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "homova";
553 for (int j = 0; j < processIDS.size(); j++) {
554 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
555 remove((outputFile + toString(processIDS[j])).c_str());
560 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
567 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
569 m->mothurOutEndLine();
570 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
571 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
572 m->mothurOutEndLine();
576 catch(exception& e) {
577 m->errorOut(e, "HomovaCommand", "execute");
581 //**********************************************************************************************************************
583 int HomovaCommand::process(vector<SharedRAbundVector*> thisLookup) {
586 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
588 driver(0, namesOfGroupCombos.size(), thisLookup, "");
591 vector<int> processIDS;
593 //loop through and create all the processes you want
594 while (process != processors) {
598 processIDS.push_back(pid);
601 driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
604 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
605 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
611 driver(lines[0].start, lines[0].num, thisLookup, "");
613 //force parent to wait until all the processes are done
614 for (int i=0;i<(processors-1);i++) {
615 int temp = processIDS[i];
620 for(int i = 0 ; i < calculators.size(); i++) {
621 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".homova";
623 for (int j = 0; j < processIDS.size(); j++) {
624 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
625 remove((outputFile + toString(processIDS[j])).c_str());
630 driver(0, namesOfGroupCombos.size(), thisLookUp, "");
635 catch(exception& e) {
636 m->errorOut(e, "HomovaCommand", "process");
640 //**********************************************************************************************************************
642 int HomovaCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
644 vector<SharedRAbundVector*> subset;
647 //for each calculator
648 for(int i = 0 ; i < calculators.size(); i++) {
650 //create a new filename
652 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".homova" + pidValue;
653 m->openOutputFileAppend(outputFile, out);
654 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
657 for (int c = start; c < (start+num); c++) {
659 if (m->control_pressed) { out.close(); return 0; }
662 vector<string> setNames;
663 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
665 vector<SharedRAbundVector*> thisCombosLookup;
666 vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
667 for (int k = 0; k < thisLookup.size(); k++) {
668 string thisGroup = thisLookup[k]->getGroup();
670 //is this group for a set we want to compare??
671 if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {
672 thisCombosLookup.push_back(thisLookup[k]);
673 thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
678 int numGroups = thisCombosLookup.size();
680 //calc the distance matrix
682 matrix.resize(numGroups);
683 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
685 if (thisCombosLookup.size() == 0) {
686 m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine();
689 out << thisLookup[0]->getLabel() << '\t';
690 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
691 else { out << "all" << '\t'; }
693 for (int k = 0; k < thisCombosLookup.size(); k++) {
694 for (int l = k; l < thisCombosLookup.size(); l++) {
696 if (m->control_pressed) { out.close(); return 0; }
698 if (k != l) { //we dont need to similiarity of a groups to itself
699 //get estimated similarity between 2 groups
700 subset.clear(); //clear out old pair of sharedrabunds
701 //add new pair of sharedrabunds
702 subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]);
704 //if this calc needs all groups to calculate the pair load all groups
705 if (calculators[i]->getNeedsAll()) {
706 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
707 for (int w = 0; w < thisCombosLookup.size(); w++) {
708 if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
712 data = calculators[i]->getValues(subset); //saves the calculator outputs
714 //save values in similarity matrix
715 matrix[k][l] = 1.0 - data[0];
716 matrix[l][k] = 1.0 - data[0];
722 calcHomova(out, setNames.size(), thisCombosLookupSets);
732 catch(exception& e) {
733 m->errorOut(e, "HomovaCommand", "driver");
737 //**********************************************************************************************************************
739 int HomovaCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
742 //create a new filename
744 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "homova" + pidValue;
745 m->openOutputFileAppend(outputFile, out);
746 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
749 for (int c = start; c < (start+num); c++) {
751 if (m->control_pressed) { out.close(); return 0; }
754 vector<string> setNames;
755 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
757 vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
759 for (int k = 0; k < names.size(); k++) {
760 //is this group for a set we want to compare??
761 if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {
762 thisCombosSets.push_back(designMap->getGroup(names[k]));
763 indexes.insert(k); //save indexes of valid rows in matrix for submatrix
767 int numGroups = thisCombosSets.size();
769 //calc the distance matrix
771 matrix.resize(numGroups);
773 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
775 if (thisCombosSets.size() == 0) {
776 m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine();
779 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; }
780 else { out << "all" << '\t'; }
784 for (int j = 0; j < completeMatrix.size(); j++) {
786 if (indexes.count(j) != 0) { //we want at least part of this row
788 for (int k = 0; k < completeMatrix[j].size(); k++) {
790 if (indexes.count(k) != 0) { //we want this distance
791 matrix[rowCount][columnCount] = completeMatrix[j][k];
792 matrix[columnCount][rowCount] = completeMatrix[j][k];
801 calcHomova(out, setNames.size(), thisCombosSets);
811 catch(exception& e) {
812 m->errorOut(e, "HomovaCommand", "driver");
816 //**********************************************************************************************************************
817 int HomovaCommand::calcHomova(ofstream& out, int numTreatments, vector<string> thisCombosLookupSets) {
820 double SSTotal, BValue, pValue;
821 map<string, double> SSWithin;
822 map<string, double>::iterator it;
824 SSTotal = calcWithin(matrix, numTreatments, thisCombosLookupSets);
825 int numSamples = matrix.size();
828 map<string, int> counts;
829 SSWithin = calcWithinEach(matrix, numTreatments, thisCombosLookupSets, counts);
832 double sumDenom = 0.0;
833 for (it = SSWithin.begin(); it != SSWithin.end(); it++) {
834 double temp2 = (it->second) / (double) (counts[it->first] - 1);
835 sum += ((counts[it->first] - 1) * log(temp2));
836 sumDenom += ((1 / (double) (counts[it->first] - 1)) - ( 1 / (double) (numSamples - numTreatments) ));
839 double temp = SSTotal / (double) (numSamples - numTreatments);
840 double numeratorTerm1 = (numSamples - numTreatments) * log(temp);
841 double numerator = numeratorTerm1 - sum;
842 double denom = 1 + ((1 / (double) (3 * (numTreatments - 1))) * sumDenom);
844 BValue = numerator / denom;
848 for (int i = 0; i < iters; i++) {
849 if (m->control_pressed) { break; }
851 //randomly shuffle names to randomize the matrix
852 vector<string> copyNames = thisCombosLookupSets;
853 random_shuffle(copyNames.begin(), copyNames.end());
855 SSTotal = calcWithin(matrix, numTreatments, copyNames);
858 map<string, double> randomSSWithin = calcWithinEach(matrix, numTreatments, copyNames, counts);
862 for (it = randomSSWithin.begin(); it != randomSSWithin.end(); it++) {
863 double temp2 = (it->second) / (double) (counts[it->first] - numTreatments);
864 sum += ((counts[it->first] - 1) * log(temp2));
865 sumDenom += ((1 / (double) (counts[it->first] - 1)) - ( 1 / (double) (numSamples - numTreatments) ));
868 temp = SSTotal / (double) (numSamples - numTreatments);
869 numeratorTerm1 = (numSamples - numTreatments) * log(temp);
870 numerator = numeratorTerm1 - sum;
871 denom = 1 + ((1 / (double) (3 * (numTreatments - 1))) * sumDenom);
873 double randomBValue = numerator / denom;
875 if (randomBValue <= BValue) { count++; }
878 pValue = count / (float) iters;
880 out << BValue << '\t' << pValue << endl;
884 catch(exception& e) {
885 m->errorOut(e, "HomovaCommand", "calcHomova");
889 //**********************************************************************************************************************
890 map<string, double> HomovaCommand::calcWithinEach(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets, map<string, int>& count) {
892 map<string, double> within; //maps treatment to within value
893 map<string, double>::iterator it;
894 map<string, int>::iterator itCount;
896 for (int i = 0; i < thisCombosLookupSets.size(); i++) {
897 itCount = count.find(thisCombosLookupSets[i]);
899 if (itCount == count.end()) { //first time we have seen this treatment
900 count[thisCombosLookupSets[i]] = 1;
902 count[thisCombosLookupSets[i]]++;
906 within[thisCombosLookupSets[i]] = 0.0;
910 //traverse lower triangle
911 for (int k = 0; k < thisMatrix.size(); k++) {
912 for (int l = k; l < thisMatrix[k].size(); l++) {
914 //if you are from the same treatment then eij is 1 so add, else eij = 0
915 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) {
916 within[thisCombosLookupSets[k]] += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
921 //1 / (numSamples in this treatment)
922 for (it = within.begin(); it != within.end(); it++) {
923 (it->second) *= (1.0 / (float) count[it->first]);
928 catch(exception& e) {
929 m->errorOut(e, "HomovaCommand", "calcWithinEach");
933 //**********************************************************************************************************************
934 double HomovaCommand::calcWithin(vector< vector<double> >& thisMatrix, int numTreatments, vector<string> thisCombosLookupSets) {
938 //traverse lower triangle
939 for (int k = 0; k < thisMatrix.size(); k++) {
940 for (int l = k; l < thisMatrix[k].size(); l++) {
942 //if you are from the same treatment then eij is 1 so add, else eij = 0
943 if (thisCombosLookupSets[k] == thisCombosLookupSets[l]) {
944 total += (thisMatrix[k][l] * thisMatrix[k][l]); //dij^2
949 //1 / (numSamples / numTreatments)
950 total *= (1.0 / (float) (thisMatrix.size() / (float) numTreatments));
954 catch(exception& e) {
955 m->errorOut(e, "HomovaCommand", "calcWithin");
959 //**********************************************************************************************************************