2 * phylodiversitycommand.cpp
5 * Created by westcott on 4/30/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "phylodiversitycommand.h"
12 //**********************************************************************************************************************
13 vector<string> PhyloDiversityCommand::setParameters(){
16 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
17 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
18 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
19 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
20 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prarefy);
24 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(psummary);
25 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcollect);
26 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pscale);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
39 //**********************************************************************************************************************
40 string PhyloDiversityCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The phylo.diversity command parameters are tree, group, name, groups, iters, freq, processors, scale, rarefy, collect and summary. tree and group are required, unless you have valid current files.\n";
44 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
45 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
46 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
47 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
48 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
49 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
50 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
53 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
54 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
55 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
65 //**********************************************************************************************************************
66 PhyloDiversityCommand::PhyloDiversityCommand(){
68 abort = true; calledHelp = true;
70 vector<string> tempOutNames;
71 outputTypes["phylodiv"] = tempOutNames;
72 outputTypes["rarefy"] = tempOutNames;
73 outputTypes["summary"] = tempOutNames;
76 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
80 //**********************************************************************************************************************
81 PhyloDiversityCommand::PhyloDiversityCommand(string option) {
83 abort = false; calledHelp = false;
85 //allow user to run help
86 if(option == "help") { help(); abort = true; calledHelp = true; }
89 vector<string> myArray = setParameters();;
91 OptionParser parser(option);
92 map<string,string> parameters = parser.getParameters();
93 map<string,string>::iterator it;
95 ValidParameters validParameter;
97 //check to make sure all parameters are valid for command
98 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
99 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
102 //initialize outputTypes
103 vector<string> tempOutNames;
104 outputTypes["phylodiv"] = tempOutNames;
105 outputTypes["rarefy"] = tempOutNames;
106 outputTypes["summary"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 string inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
113 it = parameters.find("tree");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["tree"] = inputDir + it->second; }
121 it = parameters.find("group");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["group"] = inputDir + it->second; }
129 it = parameters.find("name");
130 //user has given a template file
131 if(it != parameters.end()){
132 path = m->hasPath(it->second);
133 //if the user has not given a path then, add inputdir. else leave path alone.
134 if (path == "") { parameters["name"] = inputDir + it->second; }
140 //check for required parameters
141 treefile = validParameter.validFile(parameters, "tree", true);
142 if (treefile == "not open") { abort = true; }
143 else if (treefile == "not found") {
144 //if there is a current design file, use it
145 treefile = m->getTreeFile();
146 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
150 //check for required parameters
151 groupfile = validParameter.validFile(parameters, "group", true);
152 if (groupfile == "not open") { abort = true; }
153 else if (groupfile == "not found") {
154 //if there is a current design file, use it
155 groupfile = m->getGroupFile();
156 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
157 else { m->mothurOut("You have no current group file and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
160 namefile = validParameter.validFile(parameters, "name", true);
161 if (namefile == "not open") { abort = true; }
162 else if (namefile == "not found") { namefile = ""; }
164 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
167 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
170 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
171 convert(temp, iters);
173 temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; }
174 rarefy = m->isTrue(temp);
175 if (!rarefy) { iters = 1; }
177 temp = validParameter.validFile(parameters, "summary", false); if (temp == "not found") { temp = "T"; }
178 summary = m->isTrue(temp);
180 temp = validParameter.validFile(parameters, "scale", false); if (temp == "not found") { temp = "F"; }
181 scale = m->isTrue(temp);
183 temp = validParameter.validFile(parameters, "collect", false); if (temp == "not found") { temp = "F"; }
184 collect = m->isTrue(temp);
186 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
187 m->setProcessors(temp);
188 convert(temp, processors);
190 groups = validParameter.validFile(parameters, "groups", false);
191 if (groups == "not found") { groups = ""; }
193 m->splitAtDash(groups, Groups);
197 if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
201 catch(exception& e) {
202 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
206 //**********************************************************************************************************************
208 int PhyloDiversityCommand::execute(){
211 if (abort == true) { if (calledHelp) { return 0; } return 2; }
213 //read in group map info.
214 tmap = new TreeMap(groupfile);
217 if (namefile != "") { readNamesFile(); }
219 read = new ReadNewickTree(treefile);
220 int readOk = read->read(tmap);
222 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
224 read->AssembleTrees();
225 vector<Tree*> trees = read->getTrees();
228 //make sure all files match
229 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
231 if (namefile != "") {
232 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
233 else { numNamesInTree = m->Treenames.size(); }
234 }else { numNamesInTree = m->Treenames.size(); }
237 //output any names that are in group file but not in tree
238 if (numNamesInTree < tmap->getNumSeqs()) {
239 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
240 //is that name in the tree?
242 for (int j = 0; j < m->Treenames.size(); j++) {
243 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
247 if (m->control_pressed) {
248 delete tmap; for (int i = 0; i < trees.size(); i++) { delete trees[i]; }
249 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
254 //then you did not find it so report it
255 if (count == m->Treenames.size()) {
256 //if it is in your namefile then don't remove
257 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
259 if (it == nameMap.end()) {
260 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
261 tmap->removeSeq(tmap->namesOfSeqs[i]);
262 i--; //need this because removeSeq removes name from namesOfSeqs
268 SharedUtil* util = new SharedUtil();
269 util->setGroups(m->Groups, tmap->namesOfGroups, "treegroup"); //sets the groups the user wants to analyze
272 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
273 for (int i = 0; i < m->Groups.size(); i++) { if (m->Groups[i] == "xxx") { m->Groups.erase(m->Groups.begin()+i); break; } }
275 vector<string> outputNames;
277 //for each of the users trees
278 for(int i = 0; i < trees.size(); i++) {
280 if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
282 ofstream outSum, outRare, outCollect;
283 string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.summary";
284 string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.rarefaction";
285 string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv";
287 if (summary) { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile); outputTypes["summary"].push_back(outSumFile); }
288 if (rarefy) { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); outputTypes["rarefy"].push_back(outRareFile); }
289 if (collect) { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile); outputTypes["phylodiv"].push_back(outCollectFile); }
291 int numLeafNodes = trees[i]->getNumLeaves();
293 //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
294 vector<int> randomLeaf;
295 for (int j = 0; j < numLeafNodes; j++) {
296 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), m->Groups) == true) { //is this a node from the group the user selected.
297 randomLeaf.push_back(j);
301 numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using
303 //each group, each sampling, if no rarefy iters = 1;
304 map<string, vector<float> > diversity;
306 //each group, each sampling, if no rarefy iters = 1;
307 map<string, vector<float> > sumDiversity;
309 //find largest group total
310 int largestGroup = 0;
311 for (int j = 0; j < m->Groups.size(); j++) {
312 if (tmap->seqsPerGroup[m->Groups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[m->Groups[j]]; }
314 //initialize diversity
315 diversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0); //numSampled
318 //initialize sumDiversity
319 sumDiversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0);
322 //convert freq percentage to number
324 if (freq < 1.0) { increment = largestGroup * freq;
325 }else { increment = freq; }
327 //initialize sampling spots
328 set<int> numSampledList;
329 for(int k = 1; k <= largestGroup; k++){ if((k == 1) || (k % increment == 0)){ numSampledList.insert(k); } }
330 if(largestGroup % increment != 0){ numSampledList.insert(largestGroup); }
332 //add other groups ending points
333 for (int j = 0; j < m->Groups.size(); j++) {
334 if (numSampledList.count(diversity[m->Groups[j]].size()-1) == 0) { numSampledList.insert(diversity[m->Groups[j]].size()-1); }
337 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
339 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
342 vector<int> procIters;
344 int numItersPerProcessor = iters / processors;
346 //divide iters between processes
347 for (int h = 0; h < processors; h++) {
348 if(h == processors - 1){
349 numItersPerProcessor = iters - h * numItersPerProcessor;
351 procIters.push_back(numItersPerProcessor);
354 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum);
356 }else{ //no need to paralellize if you dont want to rarefy
357 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
362 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
365 if (rarefy) { printData(numSampledList, sumDiversity, outRare, iters); }
369 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
371 m->mothurOutEndLine();
372 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
373 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
374 m->mothurOutEndLine();
379 catch(exception& e) {
380 m->errorOut(e, "PhyloDiversityCommand", "execute");
384 //**********************************************************************************************************************
385 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
387 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
390 vector<int> processIDS;
391 map< string, vector<float> >::iterator itSum;
393 //loop through and create all the processes you want
394 while (process != processors) {
398 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
401 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
403 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
405 m->openOutputFile(outTemp, out);
407 //output the sumDIversity
408 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
409 out << itSum->first << '\t' << (itSum->second).size() << '\t';
410 for (int k = 0; k < (itSum->second).size(); k++) {
411 out << (itSum->second)[k] << '\t';
420 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
421 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
426 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
428 //force parent to wait until all the processes are done
429 for (int i=0;i<(processors-1);i++) {
430 int temp = processIDS[i];
434 //get data created by processes
435 for (int i=0;i<(processors-1);i++) {
437 //input the sumDIversity
438 string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
440 m->openInputFile(inTemp, in);
442 //output the sumDIversity
443 for (int j = 0; j < sumDiv.size(); j++) {
447 in >> group >> size; m->gobble(in);
449 for (int k = 0; k < size; k++) {
453 sumDiv[group][k] += tempVal;
459 remove(inTemp.c_str());
467 catch(exception& e) {
468 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
472 //**********************************************************************************************************************
473 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
475 int numLeafNodes = randomLeaf.size();
477 for (int l = 0; l < numIters; l++) {
478 random_shuffle(randomLeaf.begin(), randomLeaf.end());
481 map<string, int> counts;
482 map< string, set<int> > countedBranch;
483 for (int j = 0; j < m->Groups.size(); j++) { counts[m->Groups[j]] = 0; countedBranch[m->Groups[j]].insert(-2); } //add dummy index to initialize countedBranch sets
485 for(int k = 0; k < numLeafNodes; k++){
487 if (m->control_pressed) { return 0; }
489 //calc branch length of randomLeaf k
490 vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch);
492 //for each group in the groups update the total branch length accounting for the names file
493 vector<string> groups = t->tree[randomLeaf[k]].getGroup();
495 for (int j = 0; j < groups.size(); j++) {
496 int numSeqsInGroupJ = 0;
497 map<string, int>::iterator it;
498 it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
499 if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
500 numSeqsInGroupJ = it->second;
503 if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; }
505 for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
506 div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths
508 counts[groups[j]] += numSeqsInGroupJ;
513 //add this diversity to the sum
514 for (int j = 0; j < m->Groups.size(); j++) {
515 for (int g = 0; g < div[m->Groups[j]].size(); g++) {
516 sumDiv[m->Groups[j]][g] += div[m->Groups[j]][g];
521 if ((collect) && (l == 0) && doSumCollect) { printData(numSampledList, div, outCollect, 1); }
522 if ((summary) && (l == 0) && doSumCollect) { printSumData(div, outSum, 1); }
528 catch(exception& e) {
529 m->errorOut(e, "PhyloDiversityCommand", "driver");
534 //**********************************************************************************************************************
536 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
539 out << "Groups\tnumSampled\tphyloDiversity" << endl;
541 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
543 for (int j = 0; j < m->Groups.size(); j++) {
544 int numSampled = (div[m->Groups[j]].size()-1);
545 out << m->Groups[j] << '\t' << numSampled << '\t';
549 if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; }
550 else { score = div[m->Groups[j]][numSampled] / (float)numIters; }
552 out << setprecision(4) << score << endl;
558 catch(exception& e) {
559 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
563 //**********************************************************************************************************************
565 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
568 out << "numSampled\t";
569 for (int i = 0; i < m->Groups.size(); i++) { out << m->Groups[i] << '\t'; }
572 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
574 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {
575 int numSampled = *it;
577 out << numSampled << '\t';
579 for (int j = 0; j < m->Groups.size(); j++) {
580 if (numSampled < div[m->Groups[j]].size()) {
582 if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; }
583 else { score = div[m->Groups[j]][numSampled] / (float)numIters; }
585 out << setprecision(4) << score << '\t';
586 }else { out << "NA" << '\t'; }
594 catch(exception& e) {
595 m->errorOut(e, "PhyloDiversityCommand", "printData");
599 //**********************************************************************************************************************
600 //need a vector of floats one branch length for every group the node represents.
601 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
604 //calc the branch length
605 //while you aren't at root
609 vector<string> groups = t->tree[leaf].getGroup();
610 sums.resize(groups.size(), 0.0);
612 map<string, map<int, double> > tempTotals; //maps node to total Branch Length
613 map< string, set<int> > tempCounted;
614 set<int>::iterator it;
617 if(t->tree[index].getBranchLength() != -1){
618 for (int k = 0; k < groups.size(); k++) {
619 sums[k] += abs(t->tree[index].getBranchLength());
620 counted[groups[k]].insert(index);
624 for (int k = 0; k < groups.size(); k++) {
625 tempTotals[groups[k]][index] = 0.0;
628 index = t->tree[index].getParent();
630 //while you aren't at root
631 while(t->tree[index].getParent() != -1){
633 if (m->control_pressed) { return sums; }
636 for (int k = 0; k < groups.size(); k++) {
637 map<string, int>::iterator itGroup = t->tree[index].pcount.find(groups[k]);
638 if (itGroup != t->tree[index].pcount.end()) { pcountSize++; }
640 //do both your chidren have have descendants from the users groups?
641 int lc = t->tree[index].getLChild();
642 int rc = t->tree[index].getRChild();
645 itGroup = t->tree[lc].pcount.find(groups[k]);
646 if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; }
649 itGroup = t->tree[rc].pcount.find(groups[k]);
650 if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; }
652 //if yes, add your childrens tempTotals
653 if ((LpcountSize != 0) && (RpcountSize != 0)) {
654 sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc];
656 for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); }
658 //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
659 if (t->tree[index].getBranchLength() != -1) {
660 if (counted[groups[k]].count(index) == 0) {
661 tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength());
662 tempCounted[groups[k]].insert(index);
664 tempTotals[groups[k]][index] = 0.0;
667 tempTotals[groups[k]][index] = 0.0;
669 }else { //if no, your tempTotal is your childrens temp totals + your branch length
670 tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc];
672 if (counted[groups[k]].count(index) == 0) {
673 tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength());
674 tempCounted[groups[k]].insert(index);
678 //cout << "temptotal = "<< tempTotals[i] << endl;
681 index = t->tree[index].getParent();
687 catch(exception& e) {
688 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
692 /*****************************************************************/
693 int PhyloDiversityCommand::readNamesFile() {
696 numUniquesInName = 0;
699 m->openInputFile(namefile, in);
701 string first, second;
702 map<string, string>::iterator itNames;
705 in >> first >> second; m->gobble(in);
709 itNames = m->names.find(first);
710 if (itNames == m->names.end()) {
711 m->names[first] = second;
713 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
714 vector<string> dupNames;
715 m->splitAtComma(second, dupNames);
717 for (int i = 0; i < dupNames.size(); i++) {
718 nameMap[dupNames[i]] = dupNames[i];
719 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
721 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
727 catch(exception& e) {
728 m->errorOut(e, "PhyloDiversityCommand", "readNamesFile");
733 //**********************************************************************************************************************