2 * phylodiversitycommand.cpp
5 * Created by westcott on 4/30/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "phylodiversitycommand.h"
12 //**********************************************************************************************************************
13 vector<string> PhyloDiversityCommand::setParameters(){
16 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
17 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
18 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
19 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
20 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prarefy);
24 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(psummary);
25 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcollect);
26 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pscale);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
39 //**********************************************************************************************************************
40 string PhyloDiversityCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The phylo.diversity command parameters are tree, group, name, groups, iters, freq, processors, scale, rarefy, collect and summary. tree and group are required, unless you have valid current files.\n";
44 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
45 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
46 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
47 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
48 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
49 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
50 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
53 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
54 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
55 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
65 //**********************************************************************************************************************
66 PhyloDiversityCommand::PhyloDiversityCommand(){
68 abort = true; calledHelp = true;
70 vector<string> tempOutNames;
71 outputTypes["phylodiv"] = tempOutNames;
72 outputTypes["rarefy"] = tempOutNames;
73 outputTypes["summary"] = tempOutNames;
76 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
80 //**********************************************************************************************************************
81 PhyloDiversityCommand::PhyloDiversityCommand(string option) {
83 abort = false; calledHelp = false;
85 //allow user to run help
86 if(option == "help") { help(); abort = true; calledHelp = true; }
87 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
90 vector<string> myArray = setParameters();;
92 OptionParser parser(option);
93 map<string,string> parameters = parser.getParameters();
94 map<string,string>::iterator it;
96 ValidParameters validParameter;
98 //check to make sure all parameters are valid for command
99 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
100 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
103 //initialize outputTypes
104 vector<string> tempOutNames;
105 outputTypes["phylodiv"] = tempOutNames;
106 outputTypes["rarefy"] = tempOutNames;
107 outputTypes["summary"] = tempOutNames;
109 //if the user changes the input directory command factory will send this info to us in the output parameter
110 string inputDir = validParameter.validFile(parameters, "inputdir", false);
111 if (inputDir == "not found"){ inputDir = ""; }
114 it = parameters.find("tree");
115 //user has given a template file
116 if(it != parameters.end()){
117 path = m->hasPath(it->second);
118 //if the user has not given a path then, add inputdir. else leave path alone.
119 if (path == "") { parameters["tree"] = inputDir + it->second; }
122 it = parameters.find("group");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["group"] = inputDir + it->second; }
130 it = parameters.find("name");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["name"] = inputDir + it->second; }
141 m->namesOfGroups.clear();
142 m->Treenames.clear();
145 //check for required parameters
146 treefile = validParameter.validFile(parameters, "tree", true);
147 if (treefile == "not open") { abort = true; }
148 else if (treefile == "not found") {
149 //if there is a current design file, use it
150 treefile = m->getTreeFile();
151 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
152 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
155 //check for required parameters
156 groupfile = validParameter.validFile(parameters, "group", true);
157 if (groupfile == "not open") { abort = true; }
158 else if (groupfile == "not found") {
159 //if there is a current design file, use it
160 groupfile = m->getGroupFile();
161 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
162 else { m->mothurOut("You have no current group file and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
165 namefile = validParameter.validFile(parameters, "name", true);
166 if (namefile == "not open") { abort = true; }
167 else if (namefile == "not found") { namefile = ""; }
169 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
172 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
175 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
176 convert(temp, iters);
178 temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; }
179 rarefy = m->isTrue(temp);
180 if (!rarefy) { iters = 1; }
182 temp = validParameter.validFile(parameters, "summary", false); if (temp == "not found") { temp = "T"; }
183 summary = m->isTrue(temp);
185 temp = validParameter.validFile(parameters, "scale", false); if (temp == "not found") { temp = "F"; }
186 scale = m->isTrue(temp);
188 temp = validParameter.validFile(parameters, "collect", false); if (temp == "not found") { temp = "F"; }
189 collect = m->isTrue(temp);
191 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
192 m->setProcessors(temp);
193 convert(temp, processors);
195 groups = validParameter.validFile(parameters, "groups", false);
196 if (groups == "not found") { groups = ""; }
198 m->splitAtDash(groups, Groups);
202 if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
206 catch(exception& e) {
207 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
211 //**********************************************************************************************************************
213 int PhyloDiversityCommand::execute(){
216 if (abort == true) { if (calledHelp) { return 0; } return 2; }
218 m->setTreeFile(treefile);
220 //read in group map info.
221 tmap = new TreeMap(groupfile);
224 if (namefile != "") { readNamesFile(); }
226 read = new ReadNewickTree(treefile);
227 int readOk = read->read(tmap);
229 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
231 read->AssembleTrees();
232 vector<Tree*> trees = read->getTrees();
235 //make sure all files match
236 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
238 if (namefile != "") {
239 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
240 else { numNamesInTree = m->Treenames.size(); }
241 }else { numNamesInTree = m->Treenames.size(); }
244 //output any names that are in group file but not in tree
245 if (numNamesInTree < tmap->getNumSeqs()) {
246 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
247 //is that name in the tree?
249 for (int j = 0; j < m->Treenames.size(); j++) {
250 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
254 if (m->control_pressed) {
255 delete tmap; for (int i = 0; i < trees.size(); i++) { delete trees[i]; }
256 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
261 //then you did not find it so report it
262 if (count == m->Treenames.size()) {
263 //if it is in your namefile then don't remove
264 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
266 if (it == nameMap.end()) {
267 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
268 tmap->removeSeq(tmap->namesOfSeqs[i]);
269 i--; //need this because removeSeq removes name from namesOfSeqs
275 SharedUtil* util = new SharedUtil();
276 util->setGroups(m->Groups, tmap->namesOfGroups, "treegroup"); //sets the groups the user wants to analyze
279 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
280 for (int i = 0; i < m->Groups.size(); i++) { if (m->Groups[i] == "xxx") { m->Groups.erase(m->Groups.begin()+i); break; } }
282 vector<string> outputNames;
284 //for each of the users trees
285 for(int i = 0; i < trees.size(); i++) {
287 if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
289 ofstream outSum, outRare, outCollect;
290 string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.summary";
291 string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.rarefaction";
292 string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv";
294 if (summary) { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile); outputTypes["summary"].push_back(outSumFile); }
295 if (rarefy) { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); outputTypes["rarefy"].push_back(outRareFile); }
296 if (collect) { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile); outputTypes["phylodiv"].push_back(outCollectFile); }
298 int numLeafNodes = trees[i]->getNumLeaves();
300 //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
301 vector<int> randomLeaf;
302 for (int j = 0; j < numLeafNodes; j++) {
303 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), m->Groups) == true) { //is this a node from the group the user selected.
304 randomLeaf.push_back(j);
308 numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using
310 //each group, each sampling, if no rarefy iters = 1;
311 map<string, vector<float> > diversity;
313 //each group, each sampling, if no rarefy iters = 1;
314 map<string, vector<float> > sumDiversity;
316 //find largest group total
317 int largestGroup = 0;
318 for (int j = 0; j < m->Groups.size(); j++) {
319 if (tmap->seqsPerGroup[m->Groups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[m->Groups[j]]; }
321 //initialize diversity
322 diversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0); //numSampled
325 //initialize sumDiversity
326 sumDiversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0);
329 //convert freq percentage to number
331 if (freq < 1.0) { increment = largestGroup * freq;
332 }else { increment = freq; }
334 //initialize sampling spots
335 set<int> numSampledList;
336 for(int k = 1; k <= largestGroup; k++){ if((k == 1) || (k % increment == 0)){ numSampledList.insert(k); } }
337 if(largestGroup % increment != 0){ numSampledList.insert(largestGroup); }
339 //add other groups ending points
340 for (int j = 0; j < m->Groups.size(); j++) {
341 if (numSampledList.count(diversity[m->Groups[j]].size()-1) == 0) { numSampledList.insert(diversity[m->Groups[j]].size()-1); }
344 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
346 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
349 vector<int> procIters;
351 int numItersPerProcessor = iters / processors;
353 //divide iters between processes
354 for (int h = 0; h < processors; h++) {
355 if(h == processors - 1){
356 numItersPerProcessor = iters - h * numItersPerProcessor;
358 procIters.push_back(numItersPerProcessor);
361 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum);
363 }else{ //no need to paralellize if you dont want to rarefy
364 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
369 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
372 if (rarefy) { printData(numSampledList, sumDiversity, outRare, iters); }
376 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
378 m->mothurOutEndLine();
379 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
380 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
381 m->mothurOutEndLine();
386 catch(exception& e) {
387 m->errorOut(e, "PhyloDiversityCommand", "execute");
391 //**********************************************************************************************************************
392 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
394 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
397 vector<int> processIDS;
398 map< string, vector<float> >::iterator itSum;
400 //loop through and create all the processes you want
401 while (process != processors) {
405 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
408 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
410 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
412 m->openOutputFile(outTemp, out);
414 //output the sumDIversity
415 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
416 out << itSum->first << '\t' << (itSum->second).size() << '\t';
417 for (int k = 0; k < (itSum->second).size(); k++) {
418 out << (itSum->second)[k] << '\t';
427 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
428 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
433 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
435 //force parent to wait until all the processes are done
436 for (int i=0;i<(processors-1);i++) {
437 int temp = processIDS[i];
441 //get data created by processes
442 for (int i=0;i<(processors-1);i++) {
444 //input the sumDIversity
445 string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
447 m->openInputFile(inTemp, in);
449 //output the sumDIversity
450 for (int j = 0; j < sumDiv.size(); j++) {
454 in >> group >> size; m->gobble(in);
456 for (int k = 0; k < size; k++) {
460 sumDiv[group][k] += tempVal;
466 remove(inTemp.c_str());
474 catch(exception& e) {
475 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
479 //**********************************************************************************************************************
480 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
482 int numLeafNodes = randomLeaf.size();
484 for (int l = 0; l < numIters; l++) {
485 random_shuffle(randomLeaf.begin(), randomLeaf.end());
488 map<string, int> counts;
489 map< string, set<int> > countedBranch;
490 for (int j = 0; j < m->Groups.size(); j++) { counts[m->Groups[j]] = 0; countedBranch[m->Groups[j]].insert(-2); } //add dummy index to initialize countedBranch sets
492 for(int k = 0; k < numLeafNodes; k++){
494 if (m->control_pressed) { return 0; }
496 //calc branch length of randomLeaf k
497 vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch);
499 //for each group in the groups update the total branch length accounting for the names file
500 vector<string> groups = t->tree[randomLeaf[k]].getGroup();
502 for (int j = 0; j < groups.size(); j++) {
503 int numSeqsInGroupJ = 0;
504 map<string, int>::iterator it;
505 it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
506 if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
507 numSeqsInGroupJ = it->second;
510 if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; }
512 for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
513 div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths
515 counts[groups[j]] += numSeqsInGroupJ;
520 //add this diversity to the sum
521 for (int j = 0; j < m->Groups.size(); j++) {
522 for (int g = 0; g < div[m->Groups[j]].size(); g++) {
523 sumDiv[m->Groups[j]][g] += div[m->Groups[j]][g];
528 if ((collect) && (l == 0) && doSumCollect) { printData(numSampledList, div, outCollect, 1); }
529 if ((summary) && (l == 0) && doSumCollect) { printSumData(div, outSum, 1); }
535 catch(exception& e) {
536 m->errorOut(e, "PhyloDiversityCommand", "driver");
541 //**********************************************************************************************************************
543 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
546 out << "Groups\tnumSampled\tphyloDiversity" << endl;
548 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
550 for (int j = 0; j < m->Groups.size(); j++) {
551 int numSampled = (div[m->Groups[j]].size()-1);
552 out << m->Groups[j] << '\t' << numSampled << '\t';
556 if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; }
557 else { score = div[m->Groups[j]][numSampled] / (float)numIters; }
559 out << setprecision(4) << score << endl;
565 catch(exception& e) {
566 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
570 //**********************************************************************************************************************
572 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
575 out << "numSampled\t";
576 for (int i = 0; i < m->Groups.size(); i++) { out << m->Groups[i] << '\t'; }
579 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
581 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {
582 int numSampled = *it;
584 out << numSampled << '\t';
586 for (int j = 0; j < m->Groups.size(); j++) {
587 if (numSampled < div[m->Groups[j]].size()) {
589 if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; }
590 else { score = div[m->Groups[j]][numSampled] / (float)numIters; }
592 out << setprecision(4) << score << '\t';
593 }else { out << "NA" << '\t'; }
601 catch(exception& e) {
602 m->errorOut(e, "PhyloDiversityCommand", "printData");
606 //**********************************************************************************************************************
607 //need a vector of floats one branch length for every group the node represents.
608 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
611 //calc the branch length
612 //while you aren't at root
616 vector<string> groups = t->tree[leaf].getGroup();
617 sums.resize(groups.size(), 0.0);
619 map<string, map<int, double> > tempTotals; //maps node to total Branch Length
620 map< string, set<int> > tempCounted;
621 set<int>::iterator it;
624 if(t->tree[index].getBranchLength() != -1){
625 for (int k = 0; k < groups.size(); k++) {
626 sums[k] += abs(t->tree[index].getBranchLength());
627 counted[groups[k]].insert(index);
631 for (int k = 0; k < groups.size(); k++) {
632 tempTotals[groups[k]][index] = 0.0;
635 index = t->tree[index].getParent();
637 //while you aren't at root
638 while(t->tree[index].getParent() != -1){
640 if (m->control_pressed) { return sums; }
643 for (int k = 0; k < groups.size(); k++) {
644 map<string, int>::iterator itGroup = t->tree[index].pcount.find(groups[k]);
645 if (itGroup != t->tree[index].pcount.end()) { pcountSize++; }
647 //do both your chidren have have descendants from the users groups?
648 int lc = t->tree[index].getLChild();
649 int rc = t->tree[index].getRChild();
652 itGroup = t->tree[lc].pcount.find(groups[k]);
653 if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; }
656 itGroup = t->tree[rc].pcount.find(groups[k]);
657 if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; }
659 //if yes, add your childrens tempTotals
660 if ((LpcountSize != 0) && (RpcountSize != 0)) {
661 sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc];
663 for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); }
665 //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
666 if (t->tree[index].getBranchLength() != -1) {
667 if (counted[groups[k]].count(index) == 0) {
668 tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength());
669 tempCounted[groups[k]].insert(index);
671 tempTotals[groups[k]][index] = 0.0;
674 tempTotals[groups[k]][index] = 0.0;
676 }else { //if no, your tempTotal is your childrens temp totals + your branch length
677 tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc];
679 if (counted[groups[k]].count(index) == 0) {
680 tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength());
681 tempCounted[groups[k]].insert(index);
685 //cout << "temptotal = "<< tempTotals[i] << endl;
688 index = t->tree[index].getParent();
694 catch(exception& e) {
695 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
699 /*****************************************************************/
700 int PhyloDiversityCommand::readNamesFile() {
703 numUniquesInName = 0;
706 m->openInputFile(namefile, in);
708 string first, second;
709 map<string, string>::iterator itNames;
712 in >> first >> second; m->gobble(in);
716 itNames = m->names.find(first);
717 if (itNames == m->names.end()) {
718 m->names[first] = second;
720 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
721 vector<string> dupNames;
722 m->splitAtComma(second, dupNames);
724 for (int i = 0; i < dupNames.size(); i++) {
725 nameMap[dupNames[i]] = dupNames[i];
726 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
728 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
734 catch(exception& e) {
735 m->errorOut(e, "PhyloDiversityCommand", "readNamesFile");
740 //**********************************************************************************************************************