]> git.donarmstrong.com Git - mothur.git/blobdiff - phylodiversitycommand.cpp
added SequenceCountParser class to parse the count table by group. added count parame...
[mothur.git] / phylodiversitycommand.cpp
index bf502abe715baadd43112377c374ba218414d3d8..ddd2b316d507b8477d84d529ae9fc4d88c69e518 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include "phylodiversitycommand.h"
+#include "treereader.h"
 
 //**********************************************************************************************************************
 vector<string> PhyloDiversityCommand::setParameters(){ 
@@ -60,7 +61,28 @@ string PhyloDiversityCommand::getHelpString(){
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+string PhyloDiversityCommand::getOutputFileNameTag(string type, string inputName=""){  
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "phylodiv") {  outputFileName =  "phylodiv"; }
+            else if (type == "rarefy") {  outputFileName =  "phylodiv.rarefaction"; }
+            else if (type == "summary") {  outputFileName =  "phylodiv.summary"; }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloDiversityCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
 
 //**********************************************************************************************************************
 PhyloDiversityCommand::PhyloDiversityCommand(){        
@@ -136,44 +158,35 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                                }
                        }
                        
-                       m->runParse = true;
-                       m->Groups.clear();
-                       m->namesOfGroups.clear();
-                       m->Treenames.clear();
-                       m->names.clear();
-                       
                        //check for required parameters
                        treefile = validParameter.validFile(parameters, "tree", true);
-                       if (treefile == "not open") { abort = true; }
+                       if (treefile == "not open") { treefile = ""; abort = true; }
                        else if (treefile == "not found") {                             
                                //if there is a current design file, use it
                                treefile = m->getTreeFile(); 
                                if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
                                else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
-                       }       
+                       }else { m->setTreeFile(treefile); }     
                        
                        //check for required parameters
                        groupfile = validParameter.validFile(parameters, "group", true);
-                       if (groupfile == "not open") { abort = true; }
-                       else if (groupfile == "not found") { 
-                               //if there is a current design file, use it
-                               groupfile = m->getGroupFile(); 
-                               if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current group file and the group parameter is required."); m->mothurOutEndLine(); abort = true; }                                                             
-                       }
+                       if (groupfile == "not open") { groupfile = ""; abort = true; }
+                       else if (groupfile == "not found") { groupfile = ""; }
+                       else { m->setGroupFile(groupfile); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = ""; }
+                       else { m->setNameFile(namefile); }
                        
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
                        
                        string temp;
                        temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
-                       convert(temp, freq); 
+                       m->mothurConvert(temp, freq); 
                        
                        temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
-                       convert(temp, iters); 
+                       m->mothurConvert(temp, iters); 
                        
                        temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
                        rarefy = m->isTrue(temp);
@@ -190,16 +203,21 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors); 
+                       m->mothurConvert(temp, processors); 
                        
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = "";  }
                        else { 
                                m->splitAtDash(groups, Groups);
-                               m->Groups = Groups;
+                               m->setGroups(Groups);
                        }
                        
                        if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
                }
                
        }
@@ -215,81 +233,34 @@ int PhyloDiversityCommand::execute(){
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
+        int start = time(NULL);
+        
                m->setTreeFile(treefile);
-               
-               //read in group map info.
-               tmap = new TreeMap(groupfile);
-               tmap->readMap();
-               
-               if (namefile != "") { readNamesFile(); }
-               
-               read = new ReadNewickTree(treefile);
-               int readOk = read->read(tmap); 
-               
-               if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-               
-               read->AssembleTrees();
-               vector<Tree*> trees = read->getTrees();
-               delete read;
-               
-               //make sure all files match
-               //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
-               int numNamesInTree;
-               if (namefile != "")  {  
-                       if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
-                       else {   numNamesInTree = m->Treenames.size();  }
-               }else {  numNamesInTree = m->Treenames.size();  }
-               
-               
-               //output any names that are in group file but not in tree
-               if (numNamesInTree < tmap->getNumSeqs()) {
-                       for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
-                               //is that name in the tree?
-                               int count = 0;
-                               for (int j = 0; j < m->Treenames.size(); j++) {
-                                       if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
-                                       count++;
-                               }
-                               
-                               if (m->control_pressed) { 
-                                       delete tmap; for (int i = 0; i < trees.size(); i++) { delete trees[i]; }
-                                       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
-                                       m->Groups.clear();
-                                       return 0;
-                               }
-                               
-                               //then you did not find it so report it 
-                               if (count == m->Treenames.size()) { 
-                                       //if it is in your namefile then don't remove
-                                       map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-                                       
-                                       if (it == nameMap.end()) {
-                                               m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
-                                               tmap->removeSeq(tmap->namesOfSeqs[i]);
-                                               i--; //need this because removeSeq removes name from namesOfSeqs
-                                       }
-                               }
-                       }
-               }
-               
-               SharedUtil* util = new SharedUtil();
-               util->setGroups(m->Groups, tmap->namesOfGroups, "treegroup");   //sets the groups the user wants to analyze
-               delete util;
+        TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+        vector<Tree*> trees = reader->getTrees();
+        tmap = trees[0]->getTreeMap();
+        delete reader;
+
+               SharedUtil util;
+               vector<string> mGroups = m->getGroups();
+               vector<string> tGroups = tmap->getNamesOfGroups();
+               util.setGroups(mGroups, tGroups, "phylo.diversity");    //sets the groups the user wants to analyze
                
                //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
-               for (int i = 0; i < m->Groups.size(); i++) { if (m->Groups[i] == "xxx") { m->Groups.erase(m->Groups.begin()+i);  break; }  }
+               for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i);  break; }  }
+               m->setGroups(mGroups);
                 
                vector<string> outputNames;
                
                //for each of the users trees
                for(int i = 0; i < trees.size(); i++) {
                
-                       if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) {       remove(outputNames[j].c_str());         } return 0; }
+                       if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0; }
                        
                        ofstream outSum, outRare, outCollect;
-                       string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv.summary";
-                       string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv.rarefaction";
-                       string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv";
+                       string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("summary");
+                       string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("rarefy");
+                       string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("phylodiv");
                        
                        if (summary)    { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);             outputTypes["summary"].push_back(outSumFile);                   }
                        if (rarefy)             { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);  outputTypes["rarefy"].push_back(outRareFile);                   }
@@ -300,7 +271,7 @@ int PhyloDiversityCommand::execute(){
                        //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
                        vector<int> randomLeaf;
                        for (int j = 0; j < numLeafNodes; j++) {  
-                               if (m->inUsersGroups(trees[i]->tree[j].getGroup(), m->Groups) == true) { //is this a node from the group the user selected.
+                               if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected.
                                        randomLeaf.push_back(j); 
                                }
                        }
@@ -315,15 +286,15 @@ int PhyloDiversityCommand::execute(){
                        
                        //find largest group total 
                        int largestGroup = 0;
-                       for (int j = 0; j < m->Groups.size(); j++) {  
-                               if (tmap->seqsPerGroup[m->Groups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[m->Groups[j]]; }
+                       for (int j = 0; j < mGroups.size(); j++) {  
+                               if (tmap->seqsPerGroup[mGroups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[mGroups[j]]; }
                                
                                //initialize diversity
-                               diversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0);                //numSampled
+                               diversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);            //numSampled
                                                                                                                                                                                                                        //groupA                0.0                     0.0
                                                                                                                                                                                                                        
                                //initialize sumDiversity
-                               sumDiversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0);
+                               sumDiversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);
                        }       
 
                        //convert freq percentage to number
@@ -337,11 +308,11 @@ int PhyloDiversityCommand::execute(){
                        if(largestGroup % increment != 0){      numSampledList.insert(largestGroup);   }
                        
                        //add other groups ending points
-                       for (int j = 0; j < m->Groups.size(); j++) {  
-                               if (numSampledList.count(diversity[m->Groups[j]].size()-1) == 0) {  numSampledList.insert(diversity[m->Groups[j]].size()-1); }
+                       for (int j = 0; j < mGroups.size(); j++) {  
+                               if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) {  numSampledList.insert(diversity[mGroups[j]].size()-1); }
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                if(processors == 1){
                                        driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
                                }else{
@@ -373,8 +344,11 @@ int PhyloDiversityCommand::execute(){
                }
                
        
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
+
+        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run phylo.diversity."); m->mothurOutEndLine();
 
+        
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
@@ -391,7 +365,7 @@ int PhyloDiversityCommand::execute(){
 //**********************************************************************************************************************
 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
        try {
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                
                vector<int> processIDS;
@@ -463,7 +437,7 @@ int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map<
                        }
                                
                        in.close();
-                       remove(inTemp.c_str());
+                       m->mothurRemove(inTemp);
                }
                
 #endif
@@ -480,47 +454,59 @@ int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map<
 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
        try {
                int numLeafNodes = randomLeaf.size();
-       
+               vector<string> mGroups = m->getGroups();
+        
+        map<string, int> rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches.
+        
                for (int l = 0; l < numIters; l++) {
                                random_shuffle(randomLeaf.begin(), randomLeaf.end());
-               
+         
                                //initialize counts
                                map<string, int> counts;
-                               map< string, set<int> > countedBranch;  
-                               for (int j = 0; j < m->Groups.size(); j++) {  counts[m->Groups[j]] = 0; countedBranch[m->Groups[j]].insert(-2);  }  //add dummy index to initialize countedBranch sets
+                vector< map<string, bool> > countedBranch;
+                for (int i = 0; i < t->getNumNodes(); i++) {
+                    map<string, bool> temp;
+                    for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; }
+                    countedBranch.push_back(temp);
+                }
+            
+                               for (int j = 0; j < mGroups.size(); j++) {  counts[mGroups[j]] = 0;   }  
                                
                                for(int k = 0; k < numLeafNodes; k++){
                                                
                                        if (m->control_pressed) { return 0; }
                                        
                                        //calc branch length of randomLeaf k
-                                       vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch);
+                    vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup);
                        
                                        //for each group in the groups update the total branch length accounting for the names file
                                        vector<string> groups = t->tree[randomLeaf[k]].getGroup();
                                        
                                        for (int j = 0; j < groups.size(); j++) {
-                                               int numSeqsInGroupJ = 0;
-                                               map<string, int>::iterator it;
-                                               it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
-                                               if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
-                                                       numSeqsInGroupJ = it->second;
-                                               }
-                                               
-                                               if (numSeqsInGroupJ != 0) {     div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
-                                               
-                                               for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
-                                                       div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
-                                               }
-                                               counts[groups[j]] += numSeqsInGroupJ;
+                        
+                        if (m->inUsersGroups(groups[j], mGroups)) {
+                            int numSeqsInGroupJ = 0;
+                            map<string, int>::iterator it;
+                            it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
+                            if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
+                                numSeqsInGroupJ = it->second;
+                            }
+                            
+                            if (numSeqsInGroupJ != 0) {        div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
+                            
+                            for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
+                                div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
+                            }
+                            counts[groups[j]] += numSeqsInGroupJ;
+                        }
                                        }
                                }
                                
                                if (rarefy) {
                                        //add this diversity to the sum
-                                       for (int j = 0; j < m->Groups.size(); j++) {  
-                                               for (int g = 0; g < div[m->Groups[j]].size(); g++) {
-                                                       sumDiv[m->Groups[j]][g] += div[m->Groups[j]][g];
+                                       for (int j = 0; j < mGroups.size(); j++) {  
+                                               for (int g = 0; g < div[mGroups[j]].size(); g++) {
+                                                       sumDiv[mGroups[j]][g] += div[mGroups[j]][g];
                                                }
                                        }
                                }
@@ -546,15 +532,16 @@ void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofst
                out << "Groups\tnumSampled\tphyloDiversity" << endl;
                
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-                       
-               for (int j = 0; j < m->Groups.size(); j++) {
-                       int numSampled = (div[m->Groups[j]].size()-1);
-                       out << m->Groups[j] << '\t' << numSampled << '\t';
+               
+               vector<string> mGroups = m->getGroups();
+               for (int j = 0; j < mGroups.size(); j++) {
+                       int numSampled = (div[mGroups[j]].size()-1);
+                       out << mGroups[j] << '\t' << numSampled << '\t';
                
                         
                        float score;
-                       if (scale)      {  score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled;       }
-                       else            {       score = div[m->Groups[j]][numSampled] / (float)numIters;        }
+                       if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
+                       else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
                                
                        out << setprecision(4) << score << endl;
                }
@@ -573,7 +560,8 @@ void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float>
        try {
                
                out << "numSampled\t";
-               for (int i = 0; i < m->Groups.size(); i++) { out << m->Groups[i] << '\t';  }
+               vector<string> mGroups = m->getGroups();
+               for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t';  }
                out << endl;
                
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
@@ -582,12 +570,12 @@ void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float>
                        int numSampled = *it;
                        
                        out << numSampled << '\t';  
-                       
-                       for (int j = 0; j < m->Groups.size(); j++) {
-                               if (numSampled < div[m->Groups[j]].size()) { 
+               
+                       for (int j = 0; j < mGroups.size(); j++) {
+                               if (numSampled < div[mGroups[j]].size()) { 
                                        float score;
-                                       if (scale)      {  score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled;       }
-                                       else            {       score = div[m->Groups[j]][numSampled] / (float)numIters;        }
+                                       if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
+                                       else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
 
                                        out << setprecision(4) << score << '\t';
                                }else { out << "NA" << '\t'; }
@@ -605,9 +593,9 @@ void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float>
 }
 //**********************************************************************************************************************
 //need a vector of floats one branch length for every group the node represents.
-vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
+vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map<string, bool> >& counted, map<string, int> roots){
        try {
-
+        
                //calc the branch length
                //while you aren't at root
                vector<float> sums; 
@@ -616,127 +604,101 @@ vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< st
                vector<string> groups = t->tree[leaf].getGroup();
                sums.resize(groups.size(), 0.0);
                
-               map<string, map<int, double> > tempTotals; //maps node to total Branch Length
-               map< string, set<int> > tempCounted;
-               set<int>::iterator it;
-       
-               //you are a leaf
+        
+        //you are a leaf
                if(t->tree[index].getBranchLength() != -1){     
                        for (int k = 0; k < groups.size(); k++) { 
                                sums[k] += abs(t->tree[index].getBranchLength());       
-                               counted[groups[k]].insert(index);
                        }
                }
-               
-               for (int k = 0; k < groups.size(); k++) { 
-                       tempTotals[groups[k]][index] = 0.0;     
-               }
-               
-               index = t->tree[index].getParent();     
-                       
+        
+        
+        index = t->tree[index].getParent();    
+        
                //while you aren't at root
                while(t->tree[index].getParent() != -1){
-
+            
                        if (m->control_pressed) {  return sums; }
                        
-                       int pcountSize = 0;     
                        for (int k = 0; k < groups.size(); k++) {
-                               map<string, int>::iterator itGroup = t->tree[index].pcount.find(groups[k]);
-                               if (itGroup != t->tree[index].pcount.end()) { pcountSize++;  } 
-                       
-                               //do both your chidren have have descendants from the users groups? 
-                               int lc = t->tree[index].getLChild();
-                               int rc = t->tree[index].getRChild();
-                       
-                               int LpcountSize = 0;
-                               itGroup = t->tree[lc].pcount.find(groups[k]);
-                               if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
-                                                       
-                               int RpcountSize = 0;
-                               itGroup = t->tree[rc].pcount.find(groups[k]);
-                               if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
-                                                               
-                               //if yes, add your childrens tempTotals
-                               if ((LpcountSize != 0) && (RpcountSize != 0)) {
-                                       sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
-                                       
-                                       for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); }
-
-                                       //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
-                                       if (t->tree[index].getBranchLength() != -1) {
-                                               if (counted[groups[k]].count(index) == 0) {
-                                                       tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength());
-                                                       tempCounted[groups[k]].insert(index);
-                                               }else{
-                                                       tempTotals[groups[k]][index] = 0.0;
-                                               }
-                                       }else {
-                                               tempTotals[groups[k]][index] = 0.0;
-                                       }
-                               }else { //if no, your tempTotal is your childrens temp totals + your branch length
-                                       tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
-                                                                       
-                                       if (counted[groups[k]].count(index) == 0) {
-                                               tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength());
-                                               tempCounted[groups[k]].insert(index);
-                                       }
-
-                               }
-                               //cout << "temptotal = "<< tempTotals[i] << endl;
-                       }
-                       
-                       index = t->tree[index].getParent();     
-               }
-
+                
+                if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done
+                
+                if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early
+                    if (t->tree[index].getBranchLength() != -1) {
+                        sums[k] += abs(t->tree[index].getBranchLength());
+                    }
+                    counted[index][groups[k]] = true;
+                }
+            }
+            index = t->tree[index].getParent();        
+        }
+        
                return sums;
-
+        
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
                exit(1);
        }
 }
-/*****************************************************************/
-int PhyloDiversityCommand::readNamesFile() {
+//**********************************************************************************************************************
+map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
        try {
-               m->names.clear();
-               numUniquesInName = 0;
-               
-               ifstream in;
-               m->openInputFile(namefile, in);
-               
-               string first, second;
-               map<string, string>::iterator itNames;
-               
-               while(!in.eof()) {
-                       in >> first >> second; m->gobble(in);
-                       
-                       numUniquesInName++;
-                       
-                       itNames = m->names.find(first);
-                       if (itNames == m->names.end()) {  
-                               m->names[first] = second; 
-                               
-                               //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
-                               vector<string> dupNames;
-                               m->splitAtComma(second, dupNames);
-                               
-                               for (int i = 0; i < dupNames.size(); i++) {     
-                                       nameMap[dupNames[i]] = dupNames[i]; 
-                                       if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
-                               }
-                       }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
-               }
-               in.close();
-               
-               return 0;
+               map<string, int> roots; //maps group to root for group, may not be root of tree
+               map<string, bool> done;
+       
+               //initialize root for all groups to -1
+               for (int k = 0; k < (t->getTreeMap())->getNamesOfGroups().size(); k++) { done[(t->getTreeMap())->getNamesOfGroups()[k]] = false; }
+        
+        for (int i = 0; i < t->getNumLeaves(); i++) {
+            
+            vector<string> groups = t->tree[i].getGroup();
+            
+            int index = t->tree[i].getParent();
+            
+            for (int j = 0; j < groups.size(); j++) {
+                
+                if (done[groups[j]] == false) { //we haven't found the root for this group yet
+                    
+                    done[groups[j]] = true;
+                    roots[groups[j]] = i; //set root to self to start
+                    
+                    //while you aren't at root
+                    while(t->tree[index].getParent() != -1){
+                        
+                        if (m->control_pressed) {  return roots; }
+                        
+                        //do both your chidren have have descendants from the users groups? 
+                        int lc = t->tree[index].getLChild();
+                        int rc = t->tree[index].getRChild();
+                        
+                        int LpcountSize = 0;
+                        map<string, int>:: iterator itGroup = t->tree[lc].pcount.find(groups[j]);
+                        if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
+                        
+                        int RpcountSize = 0;
+                        itGroup = t->tree[rc].pcount.find(groups[j]);
+                        if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
+                        
+                        if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root
+                            roots[groups[j]] = index;
+                        }else { ;}
+                        
+                        index = t->tree[index].getParent();    
+                    }
+                }
+            }
+        }
+        
+        return roots;
+        
        }
        catch(exception& e) {
-               m->errorOut(e, "PhyloDiversityCommand", "readNamesFile");
+               m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************