2 * phylodiversitycommand.cpp
5 * Created by westcott on 4/30/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "phylodiversitycommand.h"
12 //**********************************************************************************************************************
13 vector<string> PhyloDiversityCommand::setParameters(){
16 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
17 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
18 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
19 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
20 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prarefy);
24 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(psummary);
25 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcollect);
26 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pscale);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
39 //**********************************************************************************************************************
40 string PhyloDiversityCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The phylo.diversity command parameters are tree, group, name, groups, iters, freq, processors, scale, rarefy, collect and summary. tree and group are required, unless you have valid current files.\n";
44 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
45 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
46 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
47 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
48 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
49 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
50 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
53 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
54 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
55 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
65 //**********************************************************************************************************************
66 PhyloDiversityCommand::PhyloDiversityCommand(){
68 abort = true; calledHelp = true;
70 vector<string> tempOutNames;
71 outputTypes["phylodiv"] = tempOutNames;
72 outputTypes["rarefy"] = tempOutNames;
73 outputTypes["summary"] = tempOutNames;
76 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
80 //**********************************************************************************************************************
81 PhyloDiversityCommand::PhyloDiversityCommand(string option) {
83 abort = false; calledHelp = false;
85 //allow user to run help
86 if(option == "help") { help(); abort = true; calledHelp = true; }
87 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
90 vector<string> myArray = setParameters();;
92 OptionParser parser(option);
93 map<string,string> parameters = parser.getParameters();
94 map<string,string>::iterator it;
96 ValidParameters validParameter;
98 //check to make sure all parameters are valid for command
99 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
100 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
103 //initialize outputTypes
104 vector<string> tempOutNames;
105 outputTypes["phylodiv"] = tempOutNames;
106 outputTypes["rarefy"] = tempOutNames;
107 outputTypes["summary"] = tempOutNames;
109 //if the user changes the input directory command factory will send this info to us in the output parameter
110 string inputDir = validParameter.validFile(parameters, "inputdir", false);
111 if (inputDir == "not found"){ inputDir = ""; }
114 it = parameters.find("tree");
115 //user has given a template file
116 if(it != parameters.end()){
117 path = m->hasPath(it->second);
118 //if the user has not given a path then, add inputdir. else leave path alone.
119 if (path == "") { parameters["tree"] = inputDir + it->second; }
122 it = parameters.find("group");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["group"] = inputDir + it->second; }
130 it = parameters.find("name");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["name"] = inputDir + it->second; }
141 m->namesOfGroups.clear();
142 m->Treenames.clear();
145 //check for required parameters
146 treefile = validParameter.validFile(parameters, "tree", true);
147 if (treefile == "not open") { abort = true; }
148 else if (treefile == "not found") {
149 //if there is a current design file, use it
150 treefile = m->getTreeFile();
151 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
152 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
155 //check for required parameters
156 groupfile = validParameter.validFile(parameters, "group", true);
157 if (groupfile == "not open") { groupfile = ""; abort = true; }
158 else if (groupfile == "not found") { groupfile = ""; }
160 namefile = validParameter.validFile(parameters, "name", true);
161 if (namefile == "not open") { abort = true; }
162 else if (namefile == "not found") { namefile = ""; }
164 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
167 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
170 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
171 convert(temp, iters);
173 temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; }
174 rarefy = m->isTrue(temp);
175 if (!rarefy) { iters = 1; }
177 temp = validParameter.validFile(parameters, "summary", false); if (temp == "not found") { temp = "T"; }
178 summary = m->isTrue(temp);
180 temp = validParameter.validFile(parameters, "scale", false); if (temp == "not found") { temp = "F"; }
181 scale = m->isTrue(temp);
183 temp = validParameter.validFile(parameters, "collect", false); if (temp == "not found") { temp = "F"; }
184 collect = m->isTrue(temp);
186 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
187 m->setProcessors(temp);
188 convert(temp, processors);
190 groups = validParameter.validFile(parameters, "groups", false);
191 if (groups == "not found") { groups = ""; }
193 m->splitAtDash(groups, Groups);
197 if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
201 catch(exception& e) {
202 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
206 //**********************************************************************************************************************
208 int PhyloDiversityCommand::execute(){
211 if (abort == true) { if (calledHelp) { return 0; } return 2; }
213 m->setTreeFile(treefile);
215 if (groupfile != "") {
216 //read in group map info.
217 tmap = new TreeMap(groupfile);
219 }else{ //fake out by putting everyone in one group
220 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
221 tmap = new TreeMap();
223 for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
226 if (namefile != "") { readNamesFile(); }
228 read = new ReadNewickTree(treefile);
229 int readOk = read->read(tmap);
231 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
233 read->AssembleTrees();
234 vector<Tree*> trees = read->getTrees();
237 //make sure all files match
238 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
240 if (namefile != "") {
241 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
242 else { numNamesInTree = m->Treenames.size(); }
243 }else { numNamesInTree = m->Treenames.size(); }
246 //output any names that are in group file but not in tree
247 if (numNamesInTree < tmap->getNumSeqs()) {
248 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
249 //is that name in the tree?
251 for (int j = 0; j < m->Treenames.size(); j++) {
252 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
256 if (m->control_pressed) {
257 delete tmap; for (int i = 0; i < trees.size(); i++) { delete trees[i]; }
258 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
263 //then you did not find it so report it
264 if (count == m->Treenames.size()) {
265 //if it is in your namefile then don't remove
266 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
268 if (it == nameMap.end()) {
269 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
270 tmap->removeSeq(tmap->namesOfSeqs[i]);
271 i--; //need this because removeSeq removes name from namesOfSeqs
277 SharedUtil* util = new SharedUtil();
278 util->setGroups(m->Groups, tmap->namesOfGroups, "phylo.diversity"); //sets the groups the user wants to analyze
281 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
282 for (int i = 0; i < m->Groups.size(); i++) { if (m->Groups[i] == "xxx") { m->Groups.erase(m->Groups.begin()+i); break; } }
284 vector<string> outputNames;
286 //for each of the users trees
287 for(int i = 0; i < trees.size(); i++) {
289 if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
291 ofstream outSum, outRare, outCollect;
292 string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.summary";
293 string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.rarefaction";
294 string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv";
296 if (summary) { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile); outputTypes["summary"].push_back(outSumFile); }
297 if (rarefy) { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); outputTypes["rarefy"].push_back(outRareFile); }
298 if (collect) { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile); outputTypes["phylodiv"].push_back(outCollectFile); }
300 int numLeafNodes = trees[i]->getNumLeaves();
302 //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
303 vector<int> randomLeaf;
304 for (int j = 0; j < numLeafNodes; j++) {
305 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), m->Groups) == true) { //is this a node from the group the user selected.
306 randomLeaf.push_back(j);
310 numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using
312 //each group, each sampling, if no rarefy iters = 1;
313 map<string, vector<float> > diversity;
315 //each group, each sampling, if no rarefy iters = 1;
316 map<string, vector<float> > sumDiversity;
318 //find largest group total
319 int largestGroup = 0;
320 for (int j = 0; j < m->Groups.size(); j++) {
321 if (tmap->seqsPerGroup[m->Groups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[m->Groups[j]]; }
323 //initialize diversity
324 diversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0); //numSampled
327 //initialize sumDiversity
328 sumDiversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0);
331 //convert freq percentage to number
333 if (freq < 1.0) { increment = largestGroup * freq;
334 }else { increment = freq; }
336 //initialize sampling spots
337 set<int> numSampledList;
338 for(int k = 1; k <= largestGroup; k++){ if((k == 1) || (k % increment == 0)){ numSampledList.insert(k); } }
339 if(largestGroup % increment != 0){ numSampledList.insert(largestGroup); }
341 //add other groups ending points
342 for (int j = 0; j < m->Groups.size(); j++) {
343 if (numSampledList.count(diversity[m->Groups[j]].size()-1) == 0) { numSampledList.insert(diversity[m->Groups[j]].size()-1); }
346 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
348 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
351 vector<int> procIters;
353 int numItersPerProcessor = iters / processors;
355 //divide iters between processes
356 for (int h = 0; h < processors; h++) {
357 if(h == processors - 1){
358 numItersPerProcessor = iters - h * numItersPerProcessor;
360 procIters.push_back(numItersPerProcessor);
363 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum);
365 }else{ //no need to paralellize if you dont want to rarefy
366 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
371 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
374 if (rarefy) { printData(numSampledList, sumDiversity, outRare, iters); }
378 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
380 m->mothurOutEndLine();
381 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
382 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
383 m->mothurOutEndLine();
388 catch(exception& e) {
389 m->errorOut(e, "PhyloDiversityCommand", "execute");
393 //**********************************************************************************************************************
394 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
396 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
399 vector<int> processIDS;
400 map< string, vector<float> >::iterator itSum;
402 //loop through and create all the processes you want
403 while (process != processors) {
407 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
410 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
412 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
414 m->openOutputFile(outTemp, out);
416 //output the sumDIversity
417 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
418 out << itSum->first << '\t' << (itSum->second).size() << '\t';
419 for (int k = 0; k < (itSum->second).size(); k++) {
420 out << (itSum->second)[k] << '\t';
429 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
430 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
435 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
437 //force parent to wait until all the processes are done
438 for (int i=0;i<(processors-1);i++) {
439 int temp = processIDS[i];
443 //get data created by processes
444 for (int i=0;i<(processors-1);i++) {
446 //input the sumDIversity
447 string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
449 m->openInputFile(inTemp, in);
451 //output the sumDIversity
452 for (int j = 0; j < sumDiv.size(); j++) {
456 in >> group >> size; m->gobble(in);
458 for (int k = 0; k < size; k++) {
462 sumDiv[group][k] += tempVal;
468 remove(inTemp.c_str());
476 catch(exception& e) {
477 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
481 //**********************************************************************************************************************
482 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
484 int numLeafNodes = randomLeaf.size();
486 for (int l = 0; l < numIters; l++) {
487 random_shuffle(randomLeaf.begin(), randomLeaf.end());
490 map<string, int> counts;
491 map< string, set<int> > countedBranch;
492 for (int j = 0; j < m->Groups.size(); j++) { counts[m->Groups[j]] = 0; countedBranch[m->Groups[j]].insert(-2); } //add dummy index to initialize countedBranch sets
494 for(int k = 0; k < numLeafNodes; k++){
496 if (m->control_pressed) { return 0; }
498 //calc branch length of randomLeaf k
499 vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch);
501 //for each group in the groups update the total branch length accounting for the names file
502 vector<string> groups = t->tree[randomLeaf[k]].getGroup();
504 for (int j = 0; j < groups.size(); j++) {
505 int numSeqsInGroupJ = 0;
506 map<string, int>::iterator it;
507 it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
508 if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
509 numSeqsInGroupJ = it->second;
512 if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; }
514 for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
515 div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths
517 counts[groups[j]] += numSeqsInGroupJ;
522 //add this diversity to the sum
523 for (int j = 0; j < m->Groups.size(); j++) {
524 for (int g = 0; g < div[m->Groups[j]].size(); g++) {
525 sumDiv[m->Groups[j]][g] += div[m->Groups[j]][g];
530 if ((collect) && (l == 0) && doSumCollect) { printData(numSampledList, div, outCollect, 1); }
531 if ((summary) && (l == 0) && doSumCollect) { printSumData(div, outSum, 1); }
537 catch(exception& e) {
538 m->errorOut(e, "PhyloDiversityCommand", "driver");
543 //**********************************************************************************************************************
545 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
548 out << "Groups\tnumSampled\tphyloDiversity" << endl;
550 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
552 for (int j = 0; j < m->Groups.size(); j++) {
553 int numSampled = (div[m->Groups[j]].size()-1);
554 out << m->Groups[j] << '\t' << numSampled << '\t';
558 if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; }
559 else { score = div[m->Groups[j]][numSampled] / (float)numIters; }
561 out << setprecision(4) << score << endl;
567 catch(exception& e) {
568 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
572 //**********************************************************************************************************************
574 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
577 out << "numSampled\t";
578 for (int i = 0; i < m->Groups.size(); i++) { out << m->Groups[i] << '\t'; }
581 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
583 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {
584 int numSampled = *it;
586 out << numSampled << '\t';
588 for (int j = 0; j < m->Groups.size(); j++) {
589 if (numSampled < div[m->Groups[j]].size()) {
591 if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; }
592 else { score = div[m->Groups[j]][numSampled] / (float)numIters; }
594 out << setprecision(4) << score << '\t';
595 }else { out << "NA" << '\t'; }
603 catch(exception& e) {
604 m->errorOut(e, "PhyloDiversityCommand", "printData");
608 //**********************************************************************************************************************
609 //need a vector of floats one branch length for every group the node represents.
610 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
613 //calc the branch length
614 //while you aren't at root
618 vector<string> groups = t->tree[leaf].getGroup();
619 sums.resize(groups.size(), 0.0);
621 map<string, map<int, double> > tempTotals; //maps node to total Branch Length
622 map< string, set<int> > tempCounted;
623 set<int>::iterator it;
626 if(t->tree[index].getBranchLength() != -1){
627 for (int k = 0; k < groups.size(); k++) {
628 sums[k] += abs(t->tree[index].getBranchLength());
629 counted[groups[k]].insert(index);
633 for (int k = 0; k < groups.size(); k++) {
634 tempTotals[groups[k]][index] = 0.0;
637 index = t->tree[index].getParent();
639 //while you aren't at root
640 while(t->tree[index].getParent() != -1){
642 if (m->control_pressed) { return sums; }
645 for (int k = 0; k < groups.size(); k++) {
646 map<string, int>::iterator itGroup = t->tree[index].pcount.find(groups[k]);
647 if (itGroup != t->tree[index].pcount.end()) { pcountSize++; }
649 //do both your chidren have have descendants from the users groups?
650 int lc = t->tree[index].getLChild();
651 int rc = t->tree[index].getRChild();
654 itGroup = t->tree[lc].pcount.find(groups[k]);
655 if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; }
658 itGroup = t->tree[rc].pcount.find(groups[k]);
659 if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; }
661 //if yes, add your childrens tempTotals
662 if ((LpcountSize != 0) && (RpcountSize != 0)) {
663 sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc];
665 for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); }
667 //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
668 if (t->tree[index].getBranchLength() != -1) {
669 if (counted[groups[k]].count(index) == 0) {
670 tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength());
671 tempCounted[groups[k]].insert(index);
673 tempTotals[groups[k]][index] = 0.0;
676 tempTotals[groups[k]][index] = 0.0;
678 }else { //if no, your tempTotal is your childrens temp totals + your branch length
679 tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc];
681 if (counted[groups[k]].count(index) == 0) {
682 tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength());
683 tempCounted[groups[k]].insert(index);
687 //cout << "temptotal = "<< tempTotals[i] << endl;
690 index = t->tree[index].getParent();
696 catch(exception& e) {
697 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
701 /*****************************************************************/
702 int PhyloDiversityCommand::readNamesFile() {
705 numUniquesInName = 0;
708 m->openInputFile(namefile, in);
710 string first, second;
711 map<string, string>::iterator itNames;
714 in >> first >> second; m->gobble(in);
718 itNames = m->names.find(first);
719 if (itNames == m->names.end()) {
720 m->names[first] = second;
722 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
723 vector<string> dupNames;
724 m->splitAtComma(second, dupNames);
726 for (int i = 0; i < dupNames.size(); i++) {
727 nameMap[dupNames[i]] = dupNames[i];
728 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
730 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
736 catch(exception& e) {
737 m->errorOut(e, "PhyloDiversityCommand", "readNamesFile");
742 //**********************************************************************************************************************