2 * phylodiversitycommand.cpp
5 * Created by westcott on 4/30/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "phylodiversitycommand.h"
11 #include "treereader.h"
13 //**********************************************************************************************************************
14 vector<string> PhyloDiversityCommand::setParameters(){
17 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","phylodiv",false,true,true); parameters.push_back(ptree);
18 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
19 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
20 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
23 CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "","rarefy",false,false); parameters.push_back(prarefy);
26 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "","summary",false,false); parameters.push_back(psummary);
27 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcollect);
28 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pscale);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
41 //**********************************************************************************************************************
42 string PhyloDiversityCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The phylo.diversity command parameters are tree, group, name, count, groups, iters, freq, processors, scale, rarefy, collect and summary. tree and group are required, unless you have valid current files.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
47 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
48 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
49 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
50 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
51 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
52 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
53 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
54 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
55 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
56 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
57 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
61 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
65 //**********************************************************************************************************************
66 string PhyloDiversityCommand::getOutputPattern(string type) {
70 if (type == "phylodiv") { pattern = "[filename],[tag],phylodiv"; }
71 else if (type == "rarefy") { pattern = "[filename],[tag],phylodiv.rarefaction"; }
72 else if (type == "summary") { pattern = "[filename],[tag],phylodiv.summary"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "PhyloDiversityCommand", "getOutputPattern");
83 //**********************************************************************************************************************
84 PhyloDiversityCommand::PhyloDiversityCommand(){
86 abort = true; calledHelp = true;
88 vector<string> tempOutNames;
89 outputTypes["phylodiv"] = tempOutNames;
90 outputTypes["rarefy"] = tempOutNames;
91 outputTypes["summary"] = tempOutNames;
94 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
98 //**********************************************************************************************************************
99 PhyloDiversityCommand::PhyloDiversityCommand(string option) {
101 abort = false; calledHelp = false;
103 //allow user to run help
104 if(option == "help") { help(); abort = true; calledHelp = true; }
105 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
108 vector<string> myArray = setParameters();;
110 OptionParser parser(option);
111 map<string,string> parameters = parser.getParameters();
112 map<string,string>::iterator it;
114 ValidParameters validParameter;
116 //check to make sure all parameters are valid for command
117 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
118 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
121 //initialize outputTypes
122 vector<string> tempOutNames;
123 outputTypes["phylodiv"] = tempOutNames;
124 outputTypes["rarefy"] = tempOutNames;
125 outputTypes["summary"] = tempOutNames;
127 //if the user changes the input directory command factory will send this info to us in the output parameter
128 string inputDir = validParameter.validFile(parameters, "inputdir", false);
129 if (inputDir == "not found"){ inputDir = ""; }
132 it = parameters.find("tree");
133 //user has given a template file
134 if(it != parameters.end()){
135 path = m->hasPath(it->second);
136 //if the user has not given a path then, add inputdir. else leave path alone.
137 if (path == "") { parameters["tree"] = inputDir + it->second; }
140 it = parameters.find("group");
141 //user has given a template file
142 if(it != parameters.end()){
143 path = m->hasPath(it->second);
144 //if the user has not given a path then, add inputdir. else leave path alone.
145 if (path == "") { parameters["group"] = inputDir + it->second; }
148 it = parameters.find("name");
149 //user has given a template file
150 if(it != parameters.end()){
151 path = m->hasPath(it->second);
152 //if the user has not given a path then, add inputdir. else leave path alone.
153 if (path == "") { parameters["name"] = inputDir + it->second; }
156 it = parameters.find("count");
157 //user has given a template file
158 if(it != parameters.end()){
159 path = m->hasPath(it->second);
160 //if the user has not given a path then, add inputdir. else leave path alone.
161 if (path == "") { parameters["count"] = inputDir + it->second; }
165 //check for required parameters
166 treefile = validParameter.validFile(parameters, "tree", true);
167 if (treefile == "not open") { treefile = ""; abort = true; }
168 else if (treefile == "not found") {
169 //if there is a current design file, use it
170 treefile = m->getTreeFile();
171 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
172 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
173 }else { m->setTreeFile(treefile); }
175 //check for required parameters
176 groupfile = validParameter.validFile(parameters, "group", true);
177 if (groupfile == "not open") { groupfile = ""; abort = true; }
178 else if (groupfile == "not found") { groupfile = ""; }
179 else { m->setGroupFile(groupfile); }
181 namefile = validParameter.validFile(parameters, "name", true);
182 if (namefile == "not open") { namefile = ""; abort = true; }
183 else if (namefile == "not found") { namefile = ""; }
184 else { m->setNameFile(namefile); }
186 countfile = validParameter.validFile(parameters, "count", true);
187 if (countfile == "not open") { countfile = ""; abort = true; }
188 else if (countfile == "not found") { countfile = ""; }
189 else { m->setCountTableFile(countfile); }
191 if ((namefile != "") && (countfile != "")) {
192 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
195 if ((groupfile != "") && (countfile != "")) {
196 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
199 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
202 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
203 m->mothurConvert(temp, freq);
205 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
206 m->mothurConvert(temp, iters);
208 temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; }
209 rarefy = m->isTrue(temp);
210 if (!rarefy) { iters = 1; }
212 temp = validParameter.validFile(parameters, "summary", false); if (temp == "not found") { temp = "T"; }
213 summary = m->isTrue(temp);
215 temp = validParameter.validFile(parameters, "scale", false); if (temp == "not found") { temp = "F"; }
216 scale = m->isTrue(temp);
218 temp = validParameter.validFile(parameters, "collect", false); if (temp == "not found") { temp = "F"; }
219 collect = m->isTrue(temp);
221 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
222 m->setProcessors(temp);
223 m->mothurConvert(temp, processors);
225 groups = validParameter.validFile(parameters, "groups", false);
226 if (groups == "not found") { groups = ""; }
228 m->splitAtDash(groups, Groups);
229 m->setGroups(Groups);
232 if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
235 if (namefile == "") {
236 vector<string> files; files.push_back(treefile);
237 parser.getNameFile(files);
243 catch(exception& e) {
244 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
248 //**********************************************************************************************************************
250 int PhyloDiversityCommand::execute(){
253 if (abort == true) { if (calledHelp) { return 0; } return 2; }
255 int start = time(NULL);
257 m->setTreeFile(treefile);
259 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
260 else { reader = new TreeReader(treefile, countfile); }
261 vector<Tree*> trees = reader->getTrees();
262 ct = trees[0]->getCountTable();
266 vector<string> mGroups = m->getGroups();
267 vector<string> tGroups = ct->getNamesOfGroups();
268 util.setGroups(mGroups, tGroups, "phylo.diversity"); //sets the groups the user wants to analyze
270 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
271 for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i); break; } }
272 m->setGroups(mGroups);
274 vector<string> outputNames;
276 //for each of the users trees
277 for(int i = 0; i < trees.size(); i++) {
279 if (m->control_pressed) { delete ct; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
281 ofstream outSum, outRare, outCollect;
282 map<string, string> variables;
283 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
284 variables["[tag]"] = toString(i+1);
285 string outSumFile = getOutputFileName("summary",variables);
286 string outRareFile = getOutputFileName("rarefy",variables);
287 string outCollectFile = getOutputFileName("phylodiv",variables);
289 if (summary) { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile); outputTypes["summary"].push_back(outSumFile); }
290 if (rarefy) { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); outputTypes["rarefy"].push_back(outRareFile); }
291 if (collect) { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile); outputTypes["phylodiv"].push_back(outCollectFile); }
293 int numLeafNodes = trees[i]->getNumLeaves();
295 //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
296 vector<int> randomLeaf;
297 for (int j = 0; j < numLeafNodes; j++) {
298 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected.
299 randomLeaf.push_back(j);
304 vector<float> sums; sums.resize(m->getGroups().size(), 0);
305 vector<float> sumsAboveRoot; sumsAboveRoot.resize(m->getGroups().size(), 0);
306 for (int j = 0; j < trees[i]->getNumNodes(); j++) {
307 if (trees[i]->tree[j].getBranchLength() < 0) { cout << j << '\t' << trees[i]->tree[j].getName() << '\t' << trees[i]->tree[j].getBranchLength() << endl; }
309 sum += abs(trees[i]->tree[j].getBranchLength());
310 for (int k = 0; k < m->getGroups().size(); k++) {
311 map<string, int>::iterator itGroup = trees[i]->tree[j].pcount.find(m->getGroups()[k]);
312 if (itGroup != trees[i]->tree[j].pcount.end()) { //this branch belongs to a group we care about
313 if (j < rootForGroup[m->getGroups()[k]]) {
314 sums[k] += abs(trees[i]->tree[j].getBranchLength());
316 sumsAboveRoot[k] += abs(trees[i]->tree[j].getBranchLength());
321 cout << sum << endl; //exit(1);
323 for (int k = 0; k < m->getGroups().size(); k++) {
324 cout << m->getGroups()[k] << "root node = " << rootForGroup[m->getGroups()[k]] << "sum below root = " << sums[k] << "sum above root = " << sumsAboveRoot[k] << endl;
328 numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using
330 //each group, each sampling, if no rarefy iters = 1;
331 map<string, vector<float> > diversity;
333 //each group, each sampling, if no rarefy iters = 1;
334 map<string, vector<float> > sumDiversity;
336 //find largest group total
337 int largestGroup = 0;
338 for (int j = 0; j < mGroups.size(); j++) {
339 int numSeqsThisGroup = ct->getGroupCount(mGroups[j]);
340 if (numSeqsThisGroup > largestGroup) { largestGroup = numSeqsThisGroup; }
342 //initialize diversity
343 diversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0); //numSampled
346 //initialize sumDiversity
347 sumDiversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0);
350 //convert freq percentage to number
352 if (freq < 1.0) { increment = largestGroup * freq;
353 }else { increment = freq; }
355 //initialize sampling spots
356 set<int> numSampledList;
357 for(int k = 1; k <= largestGroup; k++){ if((k == 1) || (k % increment == 0)){ numSampledList.insert(k); } }
358 if(largestGroup % increment != 0){ numSampledList.insert(largestGroup); }
360 //add other groups ending points
361 for (int j = 0; j < mGroups.size(); j++) {
362 if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) { numSampledList.insert(diversity[mGroups[j]].size()-1); }
365 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
367 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
370 vector<int> procIters;
372 int numItersPerProcessor = iters / processors;
374 //divide iters between processes
375 for (int h = 0; h < processors; h++) {
376 if(h == processors - 1){
377 numItersPerProcessor = iters - h * numItersPerProcessor;
379 procIters.push_back(numItersPerProcessor);
382 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum);
384 }else{ //no need to paralellize if you dont want to rarefy
385 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
390 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
393 if (rarefy) { printData(numSampledList, sumDiversity, outRare, iters); }
397 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
399 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run phylo.diversity."); m->mothurOutEndLine();
402 m->mothurOutEndLine();
403 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
404 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
405 m->mothurOutEndLine();
410 catch(exception& e) {
411 m->errorOut(e, "PhyloDiversityCommand", "execute");
415 //**********************************************************************************************************************
416 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
418 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
421 vector<int> processIDS;
422 map< string, vector<float> >::iterator itSum;
424 //loop through and create all the processes you want
425 while (process != processors) {
429 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
432 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
434 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
436 m->openOutputFile(outTemp, out);
438 //output the sumDIversity
439 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
440 out << itSum->first << '\t' << (itSum->second).size() << '\t';
441 for (int k = 0; k < (itSum->second).size(); k++) {
442 out << (itSum->second)[k] << '\t';
451 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
452 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
457 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
459 //force parent to wait until all the processes are done
460 for (int i=0;i<(processors-1);i++) {
461 int temp = processIDS[i];
465 //get data created by processes
466 for (int i=0;i<(processors-1);i++) {
468 //input the sumDIversity
469 string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
471 m->openInputFile(inTemp, in);
473 //output the sumDIversity
474 for (int j = 0; j < sumDiv.size(); j++) {
478 in >> group >> size; m->gobble(in);
480 for (int k = 0; k < size; k++) {
484 sumDiv[group][k] += tempVal;
490 m->mothurRemove(inTemp);
498 catch(exception& e) {
499 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
503 //**********************************************************************************************************************
504 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
506 int numLeafNodes = randomLeaf.size();
507 vector<string> mGroups = m->getGroups();
509 map<string, int> rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches.
511 for (int l = 0; l < numIters; l++) {
512 random_shuffle(randomLeaf.begin(), randomLeaf.end());
515 map<string, int> counts;
516 vector< map<string, bool> > countedBranch;
517 for (int i = 0; i < t->getNumNodes(); i++) {
518 map<string, bool> temp;
519 for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; }
520 countedBranch.push_back(temp);
523 for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; }
525 for(int k = 0; k < numLeafNodes; k++){
527 if (m->control_pressed) { return 0; }
529 //calc branch length of randomLeaf k
530 vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup);
532 //for each group in the groups update the total branch length accounting for the names file
533 vector<string> groups = t->tree[randomLeaf[k]].getGroup();
535 for (int j = 0; j < groups.size(); j++) {
537 if (m->inUsersGroups(groups[j], mGroups)) {
538 int numSeqsInGroupJ = 0;
539 map<string, int>::iterator it;
540 it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
541 if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
542 numSeqsInGroupJ = it->second;
545 if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; }
547 for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
548 div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths
550 counts[groups[j]] += numSeqsInGroupJ;
556 //add this diversity to the sum
557 for (int j = 0; j < mGroups.size(); j++) {
558 for (int g = 0; g < div[mGroups[j]].size(); g++) {
559 sumDiv[mGroups[j]][g] += div[mGroups[j]][g];
564 if ((collect) && (l == 0) && doSumCollect) { printData(numSampledList, div, outCollect, 1); }
565 if ((summary) && (l == 0) && doSumCollect) { printSumData(div, outSum, 1); }
571 catch(exception& e) {
572 m->errorOut(e, "PhyloDiversityCommand", "driver");
577 //**********************************************************************************************************************
579 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
582 out << "Groups\tnumSampled\tphyloDiversity" << endl;
584 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
586 vector<string> mGroups = m->getGroups();
587 for (int j = 0; j < mGroups.size(); j++) {
588 int numSampled = (div[mGroups[j]].size()-1);
589 out << mGroups[j] << '\t' << numSampled << '\t';
593 if (scale) { score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
594 else { score = div[mGroups[j]][numSampled] / (float)numIters; }
596 out << setprecision(4) << score << endl;
597 cout << mGroups[j] << '\t' << numSampled << '\t'<< setprecision(4) << score << endl;
603 catch(exception& e) {
604 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
608 //**********************************************************************************************************************
610 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
613 out << "numSampled\t";
614 vector<string> mGroups = m->getGroups();
615 for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t'; }
618 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
620 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {
621 int numSampled = *it;
623 out << numSampled << '\t';
625 for (int j = 0; j < mGroups.size(); j++) {
626 if (numSampled < div[mGroups[j]].size()) {
628 if (scale) { score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
629 else { score = div[mGroups[j]][numSampled] / (float)numIters; }
631 out << setprecision(4) << score << '\t';
632 }else { out << "NA" << '\t'; }
640 catch(exception& e) {
641 m->errorOut(e, "PhyloDiversityCommand", "printData");
645 //**********************************************************************************************************************
646 //need a vector of floats one branch length for every group the node represents.
647 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map<string, bool> >& counted, map<string, int> roots){
650 //calc the branch length
651 //while you aren't at root
655 vector<string> groups = t->tree[leaf].getGroup();
656 sums.resize(groups.size(), 0.0);
660 if(t->tree[index].getBranchLength() != -1){
661 for (int k = 0; k < groups.size(); k++) {
662 sums[k] += abs(t->tree[index].getBranchLength());
667 index = t->tree[index].getParent();
669 //while you aren't at root
670 while(t->tree[index].getParent() != -1){
672 if (m->control_pressed) { return sums; }
674 for (int k = 0; k < groups.size(); k++) {
676 if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done
678 if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early
679 if (t->tree[index].getBranchLength() != -1) {
680 sums[k] += abs(t->tree[index].getBranchLength());
682 counted[index][groups[k]] = true;
685 index = t->tree[index].getParent();
691 catch(exception& e) {
692 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
696 //**********************************************************************************************************************
697 map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
699 map<string, int> roots; //maps group to root for group, may not be root of tree
700 map<string, bool> done;
702 //initialize root for all groups to -1
703 for (int k = 0; k < (t->getCountTable())->getNamesOfGroups().size(); k++) { done[(t->getCountTable())->getNamesOfGroups()[k]] = false; }
705 for (int i = 0; i < t->getNumLeaves(); i++) {
707 vector<string> groups = t->tree[i].getGroup();
709 int index = t->tree[i].getParent();
711 for (int j = 0; j < groups.size(); j++) {
713 if (done[groups[j]] == false) { //we haven't found the root for this group yet, initialize it
714 done[groups[j]] = true;
715 roots[groups[j]] = i; //set root to self to start
718 //while you aren't at root
719 while(t->tree[index].getParent() != -1){
721 if (m->control_pressed) { return roots; }
723 //do both your chidren have have descendants from the users groups?
724 int lc = t->tree[index].getLChild();
725 int rc = t->tree[index].getRChild();
728 map<string, int>:: iterator itGroup = t->tree[lc].pcount.find(groups[j]);
729 if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; }
732 itGroup = t->tree[rc].pcount.find(groups[j]);
733 if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; }
735 if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root
736 if (index > roots[groups[j]]) { roots[groups[j]] = index; }
739 index = t->tree[index].getParent();
750 catch(exception& e) {
751 m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups");
755 //**********************************************************************************************************************