]> git.donarmstrong.com Git - mothur.git/blob - phylodiversitycommand.cpp
mods in testing 1.16.0
[mothur.git] / phylodiversitycommand.cpp
1 /*
2  *  phylodiversitycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylodiversitycommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> PhyloDiversityCommand::getValidParameters(){     
14         try {
15                 string Array[] =  {"freq","rarefy","iters","groups","processors","summary","collect","scale","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "PhyloDiversityCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 PhyloDiversityCommand::PhyloDiversityCommand(){ 
26         try {
27                 abort = true;
28                 //initialize outputTypes
29                 vector<string> tempOutNames;
30                 outputTypes["phylodiv"] = tempOutNames;
31                 outputTypes["rarefy"] = tempOutNames;
32                 outputTypes["summary"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> PhyloDiversityCommand::getRequiredParameters(){  
41         try {
42                 vector<string> myArray;
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "PhyloDiversityCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> PhyloDiversityCommand::getRequiredFiles(){       
52         try {
53                 string Array[] =  {"tree","group"};
54                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "PhyloDiversityCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63 PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
64         try {
65                 globaldata = GlobalData::getInstance();
66                 abort = false;
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string Array[] =  {"freq","rarefy","iters","groups","processors","summary","collect","scale","outputdir","inputdir"};
74                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string,string> parameters = parser.getParameters();
78                         
79                         ValidParameters validParameter;
80                 
81                         //check to make sure all parameters are valid for command
82                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
83                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
84                         }
85                         
86                         //initialize outputTypes
87                         vector<string> tempOutNames;
88                         outputTypes["phylodiv"] = tempOutNames;
89                         outputTypes["rarefy"] = tempOutNames;
90                         outputTypes["summary"] = tempOutNames;
91                         
92                         //if the user changes the output directory command factory will send this info to us in the output parameter 
93                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(globaldata->getTreeFile());              }
94                         
95                         if (globaldata->gTree.size() == 0) {//no trees were read
96                                 m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true;  }
97
98                         string temp;
99                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
100                         convert(temp, freq); 
101                         
102                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
103                         convert(temp, iters); 
104                         
105                         temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
106                         rarefy = m->isTrue(temp);
107                         if (!rarefy) { iters = 1;  }
108                         
109                         temp = validParameter.validFile(parameters, "summary", false);                  if (temp == "not found") { temp = "T"; }
110                         summary = m->isTrue(temp);
111                         
112                         temp = validParameter.validFile(parameters, "scale", false);                    if (temp == "not found") { temp = "F"; }
113                         scale = m->isTrue(temp);
114                         
115                         temp = validParameter.validFile(parameters, "collect", false);                  if (temp == "not found") { temp = "F"; }
116                         collect = m->isTrue(temp);
117                         
118                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
119                         convert(temp, processors); 
120                         
121                         groups = validParameter.validFile(parameters, "groups", false);                 
122                         if (groups == "not found") { groups = ""; Groups = globaldata->gTreemap->namesOfGroups;  globaldata->Groups = Groups;  }
123                         else { 
124                                 m->splitAtDash(groups, Groups);
125                                 globaldata->Groups = Groups;
126                         }
127                         
128                         if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
129                 }
130                 
131         }
132         catch(exception& e) {
133                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
134                 exit(1);
135         }                       
136 }
137 //**********************************************************************************************************************
138
139 void PhyloDiversityCommand::help(){
140         try {
141                 m->mothurOut("The phylo.diversity command can only be executed after a successful read.tree command.\n");
142                 m->mothurOut("The phylo.diversity command parameters are groups, iters, freq, processors, scale, rarefy, collect and summary.  No parameters are required.\n");
143                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n");
144                 m->mothurOut("The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n");
145                 m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n");
146                 m->mothurOut("The scale parameter is used indicate that you want your ouptut scaled to the number of sequences sampled, default = false. \n");
147                 m->mothurOut("The rarefy parameter allows you to create a rarefaction curve. The default is false.\n");
148                 m->mothurOut("The collect parameter allows you to create a collectors curve. The default is false.\n");
149                 m->mothurOut("The summary parameter allows you to create a .summary file. The default is true.\n");
150                 m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
151                 m->mothurOut("The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n");
152                 m->mothurOut("Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n");
153                 m->mothurOut("The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n");
154                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
155
156         }
157         catch(exception& e) {
158                 m->errorOut(e, "PhyloDiversityCommand", "help");
159                 exit(1);
160         }
161 }
162
163 //**********************************************************************************************************************
164
165 PhyloDiversityCommand::~PhyloDiversityCommand(){}
166
167 //**********************************************************************************************************************
168
169 int PhyloDiversityCommand::execute(){
170         try {
171                 
172                 if (abort == true) { return 0; }
173                 
174                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
175                 for (int i = 0; i < globaldata->Groups.size(); i++) { if (globaldata->Groups[i] == "xxx") { globaldata->Groups.erase(globaldata->Groups.begin()+i);  break; }  }
176                  
177                 vector<string> outputNames;
178                         
179                 vector<Tree*> trees = globaldata->gTree;
180                 
181                 //for each of the users trees
182                 for(int i = 0; i < trees.size(); i++) {
183                 
184                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
185                         
186                         ofstream outSum, outRare, outCollect;
187                         string outSumFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylodiv.summary";
188                         string outRareFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylodiv.rarefaction";
189                         string outCollectFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylodiv";
190                         
191                         if (summary)    { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);             outputTypes["summary"].push_back(outSumFile);                   }
192                         if (rarefy)             { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);  outputTypes["rarefy"].push_back(outRareFile);                   }
193                         if (collect)    { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile);  outputTypes["phylodiv"].push_back(outCollectFile);  }
194                         
195                         int numLeafNodes = trees[i]->getNumLeaves();
196                         
197                         //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
198                         vector<int> randomLeaf;
199                         for (int j = 0; j < numLeafNodes; j++) {  
200                                 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
201                                         randomLeaf.push_back(j); 
202                                 }
203                         }
204                         
205                         numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
206                         
207                         //each group, each sampling, if no rarefy iters = 1;
208                         map<string, vector<float> > diversity;
209                         
210                         //each group, each sampling, if no rarefy iters = 1;
211                         map<string, vector<float> > sumDiversity;
212                         
213                         //find largest group total 
214                         int largestGroup = 0;
215                         for (int j = 0; j < globaldata->Groups.size(); j++) {  
216                                 if (globaldata->gTreemap->seqsPerGroup[globaldata->Groups[j]] > largestGroup) { largestGroup = globaldata->gTreemap->seqsPerGroup[globaldata->Groups[j]]; }
217                                 
218                                 //initialize diversity
219                                 diversity[globaldata->Groups[j]].resize(globaldata->gTreemap->seqsPerGroup[globaldata->Groups[j]]+1, 0.0);              //numSampled
220                                                                                                                                                                                                                         //groupA                0.0                     0.0
221                                                                                                                                                                                                                         
222                                 //initialize sumDiversity
223                                 sumDiversity[globaldata->Groups[j]].resize(globaldata->gTreemap->seqsPerGroup[globaldata->Groups[j]]+1, 0.0);
224                         }       
225
226                         //convert freq percentage to number
227                         int increment = 100;
228                         if (freq < 1.0) {  increment = largestGroup * freq;  
229                         }else { increment = freq;  }
230                         
231                         //initialize sampling spots
232                         set<int> numSampledList;
233                         for(int k = 1; k <= largestGroup; k++){  if((k == 1) || (k % increment == 0)){  numSampledList.insert(k); }   }
234                         if(largestGroup % increment != 0){      numSampledList.insert(largestGroup);   }
235                         
236                         //add other groups ending points
237                         for (int j = 0; j < globaldata->Groups.size(); j++) {  
238                                 if (numSampledList.count(diversity[globaldata->Groups[j]].size()-1) == 0) {  numSampledList.insert(diversity[globaldata->Groups[j]].size()-1); }
239                         }
240                         
241                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
242                                 if(processors == 1){
243                                         driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
244                                 }else{
245                                         if (rarefy) {
246                                                 vector<int> procIters;
247                                                 
248                                                 int numItersPerProcessor = iters / processors;
249                                                 
250                                                 //divide iters between processes
251                                                 for (int h = 0; h < processors; h++) {
252                                                         if(h == processors - 1){
253                                                                 numItersPerProcessor = iters - h * numItersPerProcessor;
254                                                         }
255                                                         procIters.push_back(numItersPerProcessor);
256                                                 }
257                                                 
258                                                 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum); 
259                                                 
260                                         }else{ //no need to paralellize if you dont want to rarefy
261                                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
262                                         }
263                                 }
264
265                         #else
266                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
267                         #endif
268
269                         if (rarefy) {   printData(numSampledList, sumDiversity, outRare, iters);        }
270                 }
271                 
272         
273                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
274
275                 m->mothurOutEndLine();
276                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
277                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
278                 m->mothurOutEndLine();
279
280                 
281                 return 0;
282         }
283         catch(exception& e) {
284                 m->errorOut(e, "PhyloDiversityCommand", "execute");
285                 exit(1);
286         }
287 }
288 //**********************************************************************************************************************
289 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
290         try {
291                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
292                 int process = 1;
293                 
294                 vector<int> processIDS;
295                 map< string, vector<float> >::iterator itSum;
296                 
297                 EstOutput results;
298                 
299                 //loop through and create all the processes you want
300                 while (process != processors) {
301                         int pid = fork();
302                         
303                         if (pid > 0) {
304                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
305                                 process++;
306                         }else if (pid == 0){
307                                 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
308                                 
309                                 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
310                                 ofstream out;
311                                 m->openOutputFile(outTemp, out);
312                                 
313                                 //output the sumDIversity
314                                 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
315                                         out << itSum->first << '\t' << (itSum->second).size() << '\t';
316                                         for (int k = 0; k < (itSum->second).size(); k++) { 
317                                                 out << (itSum->second)[k] << '\t';
318                                         }
319                                         out << endl;
320                                 }
321                                 
322                                 out.close();
323                                 
324                                 exit(0);
325                         }else { 
326                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
327                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
328                                 exit(0);
329                         }
330                 }
331                 
332                 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
333                 
334                 //force parent to wait until all the processes are done
335                 for (int i=0;i<(processors-1);i++) { 
336                         int temp = processIDS[i];
337                         wait(&temp);
338                 }
339                 
340                 //get data created by processes
341                 for (int i=0;i<(processors-1);i++) { 
342                         
343                         //input the sumDIversity
344                         string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
345                         ifstream in;
346                         m->openInputFile(inTemp, in);
347                                 
348                         //output the sumDIversity
349                         for (int j = 0; j < sumDiv.size(); j++) { 
350                                 string group = "";
351                                 int size = 0;
352                                 
353                                 in >> group >> size; m->gobble(in);
354                                 
355                                 for (int k = 0; k < size; k++) { 
356                                         float tempVal;
357                                         in >> tempVal;
358                                         
359                                         sumDiv[group][k] += tempVal;
360                                 }
361                                 m->gobble(in);
362                         }
363                                 
364                         in.close();
365                         remove(inTemp.c_str());
366                 }
367                 
368 #endif
369
370         return 0;               
371         
372         }
373         catch(exception& e) {
374                 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
375                 exit(1);
376         }
377 }
378 //**********************************************************************************************************************
379 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
380         try {
381                 int numLeafNodes = randomLeaf.size();
382         
383                 for (int l = 0; l < numIters; l++) {
384                                 random_shuffle(randomLeaf.begin(), randomLeaf.end());
385                 
386                                 //initialize counts
387                                 map<string, int> counts;
388                                 map< string, set<int> > countedBranch;  
389                                 for (int j = 0; j < globaldata->Groups.size(); j++) {  counts[globaldata->Groups[j]] = 0; countedBranch[globaldata->Groups[j]].insert(-2);  }  //add dummy index to initialize countedBranch sets
390                                 
391                                 for(int k = 0; k < numLeafNodes; k++){
392                                                 
393                                         if (m->control_pressed) { return 0; }
394                                         
395                                         //calc branch length of randomLeaf k
396                                         vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch);
397                         
398                                         //for each group in the groups update the total branch length accounting for the names file
399                                         vector<string> groups = t->tree[randomLeaf[k]].getGroup();
400                                         
401                                         for (int j = 0; j < groups.size(); j++) {
402                                                 int numSeqsInGroupJ = 0;
403                                                 map<string, int>::iterator it;
404                                                 it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
405                                                 if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
406                                                         numSeqsInGroupJ = it->second;
407                                                 }
408                                                 
409                                                 if (numSeqsInGroupJ != 0) {     div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
410                                                 
411                                                 for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
412                                                         div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
413                                                 }
414                                                 counts[groups[j]] += numSeqsInGroupJ;
415                                         }
416                                 }
417                                 
418                                 if (rarefy) {
419                                         //add this diversity to the sum
420                                         for (int j = 0; j < globaldata->Groups.size(); j++) {  
421                                                 for (int g = 0; g < div[globaldata->Groups[j]].size(); g++) {
422                                                         sumDiv[globaldata->Groups[j]][g] += div[globaldata->Groups[j]][g];
423                                                 }
424                                         }
425                                 }
426                                 
427                                 if ((collect) && (l == 0) && doSumCollect) {  printData(numSampledList, div, outCollect, 1);  }
428                                 if ((summary) && (l == 0) && doSumCollect) {  printSumData(div, outSum, 1);  }
429                         }
430                         
431                         return 0;
432
433         }
434         catch(exception& e) {
435                 m->errorOut(e, "PhyloDiversityCommand", "driver");
436                 exit(1);
437         }
438 }
439
440 //**********************************************************************************************************************
441
442 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
443         try {
444                 
445                 out << "Groups\tnumSampled\tphyloDiversity" << endl;
446                 
447                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
448                         
449                 for (int j = 0; j < globaldata->Groups.size(); j++) {
450                         int numSampled = (div[globaldata->Groups[j]].size()-1);
451                         out << globaldata->Groups[j] << '\t' << numSampled << '\t';
452                 
453                          
454                         float score;
455                         if (scale)      {  score = (div[globaldata->Groups[j]][numSampled] / (float)numIters) / (float)numSampled;      }
456                         else            {       score = div[globaldata->Groups[j]][numSampled] / (float)numIters;       }
457                                 
458                         out << setprecision(4) << score << endl;
459                 }
460                                         
461                 out.close();
462                 
463         }
464         catch(exception& e) {
465                 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
466                 exit(1);
467         }
468 }
469 //**********************************************************************************************************************
470
471 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
472         try {
473                 
474                 out << "numSampled\t";
475                 for (int i = 0; i < globaldata->Groups.size(); i++) { out << globaldata->Groups[i] << '\t';  }
476                 out << endl;
477                 
478                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
479                 
480                 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {  
481                         int numSampled = *it;
482                         
483                         out << numSampled << '\t';  
484                         
485                         for (int j = 0; j < globaldata->Groups.size(); j++) {
486                                 if (numSampled < div[globaldata->Groups[j]].size()) { 
487                                         float score;
488                                         if (scale)      {  score = (div[globaldata->Groups[j]][numSampled] / (float)numIters) / (float)numSampled;      }
489                                         else            {       score = div[globaldata->Groups[j]][numSampled] / (float)numIters;       }
490
491                                         out << setprecision(4) << score << '\t';
492                                 }else { out << "NA" << '\t'; }
493                         }
494                         out << endl;
495                 }
496                 
497                 out.close();
498                 
499         }
500         catch(exception& e) {
501                 m->errorOut(e, "PhyloDiversityCommand", "printData");
502                 exit(1);
503         }
504 }
505 //**********************************************************************************************************************
506 //need a vector of floats one branch length for every group the node represents.
507 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
508         try {
509
510                 //calc the branch length
511                 //while you aren't at root
512                 vector<float> sums; 
513                 int index = leaf;
514                 
515                 vector<string> groups = t->tree[leaf].getGroup();
516                 sums.resize(groups.size(), 0.0);
517                 
518                 map<string, map<int, double> > tempTotals; //maps node to total Branch Length
519                 map< string, set<int> > tempCounted;
520                 set<int>::iterator it;
521         
522                 //you are a leaf
523                 if(t->tree[index].getBranchLength() != -1){     
524                         for (int k = 0; k < groups.size(); k++) { 
525                                 sums[k] += abs(t->tree[index].getBranchLength());       
526                                 counted[groups[k]].insert(index);
527                         }
528                 }
529                 
530                 for (int k = 0; k < groups.size(); k++) { 
531                         tempTotals[groups[k]][index] = 0.0;     
532                 }
533                 
534                 index = t->tree[index].getParent();     
535                         
536                 //while you aren't at root
537                 while(t->tree[index].getParent() != -1){
538
539                         if (m->control_pressed) {  return sums; }
540                         
541                         int pcountSize = 0;     
542                         for (int k = 0; k < groups.size(); k++) {
543                                 map<string, int>::iterator itGroup = t->tree[index].pcount.find(groups[k]);
544                                 if (itGroup != t->tree[index].pcount.end()) { pcountSize++;  } 
545                         
546                                 //do both your chidren have have descendants from the users groups? 
547                                 int lc = t->tree[index].getLChild();
548                                 int rc = t->tree[index].getRChild();
549                         
550                                 int LpcountSize = 0;
551                                 itGroup = t->tree[lc].pcount.find(groups[k]);
552                                 if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
553                                                         
554                                 int RpcountSize = 0;
555                                 itGroup = t->tree[rc].pcount.find(groups[k]);
556                                 if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
557                                                                 
558                                 //if yes, add your childrens tempTotals
559                                 if ((LpcountSize != 0) && (RpcountSize != 0)) {
560                                         sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
561                                         
562                                         for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); }
563
564                                         //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
565                                         if (t->tree[index].getBranchLength() != -1) {
566                                                 if (counted[groups[k]].count(index) == 0) {
567                                                         tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength());
568                                                         tempCounted[groups[k]].insert(index);
569                                                 }else{
570                                                         tempTotals[groups[k]][index] = 0.0;
571                                                 }
572                                         }else {
573                                                 tempTotals[groups[k]][index] = 0.0;
574                                         }
575                                 }else { //if no, your tempTotal is your childrens temp totals + your branch length
576                                         tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
577                                                                         
578                                         if (counted[groups[k]].count(index) == 0) {
579                                                 tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength());
580                                                 tempCounted[groups[k]].insert(index);
581                                         }
582
583                                 }
584                                 //cout << "temptotal = "<< tempTotals[i] << endl;
585                         }
586                         
587                         index = t->tree[index].getParent();     
588                 }
589
590                 return sums;
591
592         }
593         catch(exception& e) {
594                 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
595                 exit(1);
596         }
597 }
598 //**********************************************************************************************************************
599
600
601