2 * phylodiversitycommand.cpp
5 * Created by westcott on 4/30/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "phylodiversitycommand.h"
11 #include "treereader.h"
13 //**********************************************************************************************************************
14 vector<string> PhyloDiversityCommand::setParameters(){
17 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","phylodiv",false,true,true); parameters.push_back(ptree);
18 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
19 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
20 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
23 CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "","rarefy",false,false); parameters.push_back(prarefy);
26 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "","summary",false,false); parameters.push_back(psummary);
27 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcollect);
28 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pscale);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
41 //**********************************************************************************************************************
42 string PhyloDiversityCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The phylo.diversity command parameters are tree, group, name, count, groups, iters, freq, processors, scale, rarefy, collect and summary. tree and group are required, unless you have valid current files.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
47 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
48 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
49 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
50 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
51 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
52 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
53 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
54 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
55 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
56 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
57 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
61 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
65 //**********************************************************************************************************************
66 string PhyloDiversityCommand::getOutputPattern(string type) {
70 if (type == "phylodiv") { pattern = "[filename],[tag],phylodiv"; }
71 else if (type == "rarefy") { pattern = "[filename],[tag],phylodiv.rarefaction"; }
72 else if (type == "summary") { pattern = "[filename],[tag],phylodiv.summary"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "PhyloDiversityCommand", "getOutputPattern");
83 //**********************************************************************************************************************
84 PhyloDiversityCommand::PhyloDiversityCommand(){
86 abort = true; calledHelp = true;
88 vector<string> tempOutNames;
89 outputTypes["phylodiv"] = tempOutNames;
90 outputTypes["rarefy"] = tempOutNames;
91 outputTypes["summary"] = tempOutNames;
94 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
98 //**********************************************************************************************************************
99 PhyloDiversityCommand::PhyloDiversityCommand(string option) {
101 abort = false; calledHelp = false;
103 //allow user to run help
104 if(option == "help") { help(); abort = true; calledHelp = true; }
105 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
108 vector<string> myArray = setParameters();;
110 OptionParser parser(option);
111 map<string,string> parameters = parser.getParameters();
112 map<string,string>::iterator it;
114 ValidParameters validParameter;
116 //check to make sure all parameters are valid for command
117 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
118 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
121 //initialize outputTypes
122 vector<string> tempOutNames;
123 outputTypes["phylodiv"] = tempOutNames;
124 outputTypes["rarefy"] = tempOutNames;
125 outputTypes["summary"] = tempOutNames;
127 //if the user changes the input directory command factory will send this info to us in the output parameter
128 string inputDir = validParameter.validFile(parameters, "inputdir", false);
129 if (inputDir == "not found"){ inputDir = ""; }
132 it = parameters.find("tree");
133 //user has given a template file
134 if(it != parameters.end()){
135 path = m->hasPath(it->second);
136 //if the user has not given a path then, add inputdir. else leave path alone.
137 if (path == "") { parameters["tree"] = inputDir + it->second; }
140 it = parameters.find("group");
141 //user has given a template file
142 if(it != parameters.end()){
143 path = m->hasPath(it->second);
144 //if the user has not given a path then, add inputdir. else leave path alone.
145 if (path == "") { parameters["group"] = inputDir + it->second; }
148 it = parameters.find("name");
149 //user has given a template file
150 if(it != parameters.end()){
151 path = m->hasPath(it->second);
152 //if the user has not given a path then, add inputdir. else leave path alone.
153 if (path == "") { parameters["name"] = inputDir + it->second; }
156 it = parameters.find("count");
157 //user has given a template file
158 if(it != parameters.end()){
159 path = m->hasPath(it->second);
160 //if the user has not given a path then, add inputdir. else leave path alone.
161 if (path == "") { parameters["count"] = inputDir + it->second; }
165 //check for required parameters
166 treefile = validParameter.validFile(parameters, "tree", true);
167 if (treefile == "not open") { treefile = ""; abort = true; }
168 else if (treefile == "not found") {
169 //if there is a current design file, use it
170 treefile = m->getTreeFile();
171 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
172 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
173 }else { m->setTreeFile(treefile); }
175 //check for required parameters
176 groupfile = validParameter.validFile(parameters, "group", true);
177 if (groupfile == "not open") { groupfile = ""; abort = true; }
178 else if (groupfile == "not found") { groupfile = ""; }
179 else { m->setGroupFile(groupfile); }
181 namefile = validParameter.validFile(parameters, "name", true);
182 if (namefile == "not open") { namefile = ""; abort = true; }
183 else if (namefile == "not found") { namefile = ""; }
184 else { m->setNameFile(namefile); }
186 countfile = validParameter.validFile(parameters, "count", true);
187 if (countfile == "not open") { countfile = ""; abort = true; }
188 else if (countfile == "not found") { countfile = ""; }
189 else { m->setCountTableFile(countfile); }
191 if ((namefile != "") && (countfile != "")) {
192 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
195 if ((groupfile != "") && (countfile != "")) {
196 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
199 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
202 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
203 m->mothurConvert(temp, freq);
205 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
206 m->mothurConvert(temp, iters);
208 temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; }
209 rarefy = m->isTrue(temp);
210 if (!rarefy) { iters = 1; }
212 temp = validParameter.validFile(parameters, "summary", false); if (temp == "not found") { temp = "T"; }
213 summary = m->isTrue(temp);
215 temp = validParameter.validFile(parameters, "scale", false); if (temp == "not found") { temp = "F"; }
216 scale = m->isTrue(temp);
218 temp = validParameter.validFile(parameters, "collect", false); if (temp == "not found") { temp = "F"; }
219 collect = m->isTrue(temp);
221 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
222 m->setProcessors(temp);
223 m->mothurConvert(temp, processors);
225 groups = validParameter.validFile(parameters, "groups", false);
226 if (groups == "not found") { groups = ""; }
228 m->splitAtDash(groups, Groups);
229 m->setGroups(Groups);
232 if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
235 if (namefile == "") {
236 vector<string> files; files.push_back(treefile);
237 parser.getNameFile(files);
243 catch(exception& e) {
244 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
248 //**********************************************************************************************************************
250 int PhyloDiversityCommand::execute(){
253 if (abort == true) { if (calledHelp) { return 0; } return 2; }
255 int start = time(NULL);
257 m->setTreeFile(treefile);
259 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
260 else { reader = new TreeReader(treefile, countfile); }
261 vector<Tree*> trees = reader->getTrees();
262 ct = trees[0]->getCountTable();
266 vector<string> mGroups = m->getGroups();
267 vector<string> tGroups = ct->getNamesOfGroups();
268 util.setGroups(mGroups, tGroups, "phylo.diversity"); //sets the groups the user wants to analyze
270 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
271 for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i); break; } }
272 m->setGroups(mGroups);
274 vector<string> outputNames;
276 //for each of the users trees
277 for(int i = 0; i < trees.size(); i++) {
279 if (m->control_pressed) { delete ct; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
281 ofstream outSum, outRare, outCollect;
282 map<string, string> variables;
283 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
284 variables["[tag]"] = toString(i+1);
285 string outSumFile = getOutputFileName("summary",variables);
286 string outRareFile = getOutputFileName("rarefy",variables);
287 string outCollectFile = getOutputFileName("phylodiv",variables);
289 if (summary) { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile); outputTypes["summary"].push_back(outSumFile); }
290 if (rarefy) { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); outputTypes["rarefy"].push_back(outRareFile); }
291 if (collect) { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile); outputTypes["phylodiv"].push_back(outCollectFile); }
293 int numLeafNodes = trees[i]->getNumLeaves();
295 //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
296 vector<int> randomLeaf;
297 for (int j = 0; j < numLeafNodes; j++) {
298 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected.
299 randomLeaf.push_back(j);
304 vector<float> sums; sums.resize(m->getGroups().size(), 0);
305 vector<float> sumsAboveRoot; sumsAboveRoot.resize(m->getGroups().size(), 0);
306 for (int j = 0; j < trees[i]->getNumNodes(); j++) {
307 if (trees[i]->tree[j].getBranchLength() < 0) { cout << j << '\t' << trees[i]->tree[j].getName() << '\t' << trees[i]->tree[j].getBranchLength() << endl; }
309 sum += abs(trees[i]->tree[j].getBranchLength());
310 for (int k = 0; k < m->getGroups().size(); k++) {
311 map<string, int>::iterator itGroup = trees[i]->tree[j].pcount.find(m->getGroups()[k]);
312 if (itGroup != trees[i]->tree[j].pcount.end()) { //this branch belongs to a group we care about
313 if (j < rootForGroup[m->getGroups()[k]]) {
314 sums[k] += abs(trees[i]->tree[j].getBranchLength());
316 sumsAboveRoot[k] += abs(trees[i]->tree[j].getBranchLength());
321 cout << sum << endl; //exit(1);
323 for (int k = 0; k < m->getGroups().size(); k++) {
324 cout << m->getGroups()[k] << "root node = " << rootForGroup[m->getGroups()[k]] << "sum below root = " << sums[k] << "sum above root = " << sumsAboveRoot[k] << endl;
328 numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using
330 //each group, each sampling, if no rarefy iters = 1;
331 map<string, vector<float> > diversity;
333 //each group, each sampling, if no rarefy iters = 1;
334 map<string, vector<float> > sumDiversity;
336 //find largest group total
337 int largestGroup = 0;
338 for (int j = 0; j < mGroups.size(); j++) {
339 int numSeqsThisGroup = ct->getGroupCount(mGroups[j]);
340 if (numSeqsThisGroup > largestGroup) { largestGroup = numSeqsThisGroup; }
342 //initialize diversity
343 diversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0); //numSampled
346 //initialize sumDiversity
347 sumDiversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0);
350 //convert freq percentage to number
352 if (freq < 1.0) { increment = largestGroup * freq;
353 }else { increment = freq; }
355 //initialize sampling spots
356 set<int> numSampledList;
357 for(int k = 1; k <= largestGroup; k++){ if((k == 1) || (k % increment == 0)){ numSampledList.insert(k); } }
358 if(largestGroup % increment != 0){ numSampledList.insert(largestGroup); }
360 //add other groups ending points
361 for (int j = 0; j < mGroups.size(); j++) {
362 if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) { numSampledList.insert(diversity[mGroups[j]].size()-1); }
366 vector<int> procIters;
367 int numItersPerProcessor = iters / processors;
369 //divide iters between processes
370 for (int h = 0; h < processors; h++) {
371 if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
372 procIters.push_back(numItersPerProcessor);
375 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum);
377 }else{ //no need to paralellize if you dont want to rarefy
378 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
381 if (rarefy) { printData(numSampledList, sumDiversity, outRare, iters); }
385 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
387 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run phylo.diversity."); m->mothurOutEndLine();
390 m->mothurOutEndLine();
391 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
392 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
393 m->mothurOutEndLine();
398 catch(exception& e) {
399 m->errorOut(e, "PhyloDiversityCommand", "execute");
403 //**********************************************************************************************************************
404 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
408 vector<int> processIDS;
409 map< string, vector<float> >::iterator itSum;
411 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
413 //loop through and create all the processes you want
414 while (process != processors) {
418 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
421 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
423 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
425 m->openOutputFile(outTemp, out);
427 //output the sumDIversity
428 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
429 out << itSum->first << '\t' << (itSum->second).size() << '\t';
430 for (int k = 0; k < (itSum->second).size(); k++) {
431 out << (itSum->second)[k] << '\t';
440 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
441 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
446 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
448 //force parent to wait until all the processes are done
449 for (int i=0;i<(processors-1);i++) {
450 int temp = processIDS[i];
454 //get data created by processes
455 for (int i=0;i<(processors-1);i++) {
457 //input the sumDIversity
458 string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
460 m->openInputFile(inTemp, in);
462 //output the sumDIversity
463 for (int j = 0; j < sumDiv.size(); j++) {
467 in >> group >> size; m->gobble(in);
469 for (int k = 0; k < size; k++) {
473 sumDiv[group][k] += tempVal;
479 m->mothurRemove(inTemp);
484 vector<phylodivData*> pDataArray;
485 DWORD dwThreadIdArray[processors-1];
486 HANDLE hThreadArray[processors-1];
487 vector<CountTable*> cts;
489 map<string, int> rootForGroup = getRootForGroups(t);
491 //Create processor worker threads.
492 for( int i=1; i<processors; i++ ){
493 CountTable* copyCount = new CountTable();
495 Tree* copyTree = new Tree(copyCount);
496 copyTree->getCopy(t);
498 cts.push_back(copyCount);
499 trees.push_back(copyTree);
501 map<string, vector<float> > copydiv = div;
502 map<string, vector<float> > copysumDiv = sumDiv;
503 vector<int> copyrandomLeaf = randomLeaf;
504 set<int> copynumSampledList = numSampledList;
505 map<string, int> copyRootForGrouping = rootForGroup;
507 phylodivData* temp = new phylodivData(m, procIters[i], copydiv, copysumDiv, copyTree, copyCount, increment, copyrandomLeaf, copynumSampledList, copyRootForGrouping);
508 pDataArray.push_back(temp);
509 processIDS.push_back(i);
511 hThreadArray[i-1] = CreateThread(NULL, 0, MyPhyloDivThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
514 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
516 //Wait until all threads have terminated.
517 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
519 //Close all thread handles and free memory allocations.
520 for(int i=0; i < pDataArray.size(); i++){
521 for (itSum = pDataArray[i]->sumDiv.begin(); itSum != pDataArray[i]->sumDiv.end(); itSum++) {
522 for (int k = 0; k < (itSum->second).size(); k++) {
523 sumDiv[itSum->first][k] += (itSum->second)[k];
528 CloseHandle(hThreadArray[i]);
529 delete pDataArray[i];
537 catch(exception& e) {
538 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
542 //**********************************************************************************************************************
543 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
545 int numLeafNodes = randomLeaf.size();
546 vector<string> mGroups = m->getGroups();
548 map<string, int> rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches.
550 for (int l = 0; l < numIters; l++) {
551 random_shuffle(randomLeaf.begin(), randomLeaf.end());
554 map<string, int> counts;
555 vector< map<string, bool> > countedBranch;
556 for (int i = 0; i < t->getNumNodes(); i++) {
557 map<string, bool> temp;
558 for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; }
559 countedBranch.push_back(temp);
562 for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; }
564 for(int k = 0; k < numLeafNodes; k++){
566 if (m->control_pressed) { return 0; }
568 //calc branch length of randomLeaf k
569 vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup);
571 //for each group in the groups update the total branch length accounting for the names file
572 vector<string> groups = t->tree[randomLeaf[k]].getGroup();
574 for (int j = 0; j < groups.size(); j++) {
576 if (m->inUsersGroups(groups[j], mGroups)) {
577 int numSeqsInGroupJ = 0;
578 map<string, int>::iterator it;
579 it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
580 if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
581 numSeqsInGroupJ = it->second;
584 if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; }
586 for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
587 div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths
589 counts[groups[j]] += numSeqsInGroupJ;
595 //add this diversity to the sum
596 for (int j = 0; j < mGroups.size(); j++) {
597 for (int g = 0; g < div[mGroups[j]].size(); g++) {
598 sumDiv[mGroups[j]][g] += div[mGroups[j]][g];
603 if ((collect) && (l == 0) && doSumCollect) { printData(numSampledList, div, outCollect, 1); }
604 if ((summary) && (l == 0) && doSumCollect) { printSumData(div, outSum, 1); }
610 catch(exception& e) {
611 m->errorOut(e, "PhyloDiversityCommand", "driver");
616 //**********************************************************************************************************************
618 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
621 out << "Groups\tnumSampled\tphyloDiversity" << endl;
623 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
625 vector<string> mGroups = m->getGroups();
626 for (int j = 0; j < mGroups.size(); j++) {
627 int numSampled = (div[mGroups[j]].size()-1);
628 out << mGroups[j] << '\t' << numSampled << '\t';
632 if (scale) { score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
633 else { score = div[mGroups[j]][numSampled] / (float)numIters; }
635 out << setprecision(4) << score << endl;
636 //cout << mGroups[j] << '\t' << numSampled << '\t'<< setprecision(4) << score << endl;
642 catch(exception& e) {
643 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
647 //**********************************************************************************************************************
649 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
652 out << "numSampled\t";
653 vector<string> mGroups = m->getGroups();
654 for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t'; }
657 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
659 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {
660 int numSampled = *it;
662 out << numSampled << '\t';
664 for (int j = 0; j < mGroups.size(); j++) {
665 if (numSampled < div[mGroups[j]].size()) {
667 if (scale) { score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
668 else { score = div[mGroups[j]][numSampled] / (float)numIters; }
670 out << setprecision(4) << score << '\t';
671 }else { out << "NA" << '\t'; }
679 catch(exception& e) {
680 m->errorOut(e, "PhyloDiversityCommand", "printData");
684 //**********************************************************************************************************************
685 //need a vector of floats one branch length for every group the node represents.
686 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map<string, bool> >& counted, map<string, int> roots){
689 //calc the branch length
690 //while you aren't at root
694 vector<string> groups = t->tree[leaf].getGroup();
695 sums.resize(groups.size(), 0.0);
699 if(t->tree[index].getBranchLength() != -1){
700 for (int k = 0; k < groups.size(); k++) {
701 sums[k] += abs(t->tree[index].getBranchLength());
706 index = t->tree[index].getParent();
708 //while you aren't at root
709 while(t->tree[index].getParent() != -1){
711 if (m->control_pressed) { return sums; }
713 for (int k = 0; k < groups.size(); k++) {
715 if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done
717 if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early
718 if (t->tree[index].getBranchLength() != -1) {
719 sums[k] += abs(t->tree[index].getBranchLength());
721 counted[index][groups[k]] = true;
724 index = t->tree[index].getParent();
730 catch(exception& e) {
731 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
735 //**********************************************************************************************************************
736 map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
738 map<string, int> roots; //maps group to root for group, may not be root of tree
739 map<string, bool> done;
741 //initialize root for all groups to -1
742 for (int k = 0; k < (t->getCountTable())->getNamesOfGroups().size(); k++) { done[(t->getCountTable())->getNamesOfGroups()[k]] = false; }
744 for (int i = 0; i < t->getNumLeaves(); i++) {
746 vector<string> groups = t->tree[i].getGroup();
748 int index = t->tree[i].getParent();
750 for (int j = 0; j < groups.size(); j++) {
752 if (done[groups[j]] == false) { //we haven't found the root for this group yet, initialize it
753 done[groups[j]] = true;
754 roots[groups[j]] = i; //set root to self to start
757 //while you aren't at root
758 while(t->tree[index].getParent() != -1){
760 if (m->control_pressed) { return roots; }
762 //do both your chidren have have descendants from the users groups?
763 int lc = t->tree[index].getLChild();
764 int rc = t->tree[index].getRChild();
767 map<string, int>:: iterator itGroup = t->tree[lc].pcount.find(groups[j]);
768 if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; }
771 itGroup = t->tree[rc].pcount.find(groups[j]);
772 if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; }
774 if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root
775 if (index > roots[groups[j]]) { roots[groups[j]] = index; }
778 index = t->tree[index].getParent();
789 catch(exception& e) {
790 m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups");
794 //**********************************************************************************************************************