]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed bug in unifrac commands with unrooted trees
authorwestcott <westcott>
Wed, 23 Feb 2011 18:34:53 +0000 (18:34 +0000)
committerwestcott <westcott>
Wed, 23 Feb 2011 18:34:53 +0000 (18:34 +0000)
readtree.cpp
readtree.h
tree.cpp
unifracunweightedcommand.cpp
unweighted.cpp
unweighted.h

index e98524b45e9ec513fe171c5a342ab381a68fcacc..8158a577f478b71139ce5c6f91eeb9dedc4b0d62 100644 (file)
@@ -129,7 +129,7 @@ int ReadNewickTree::read() {
                        }
                //if you are a nexus file
                }else if ((c = filehandle.peek()) == '#') {
-                       nexusTranslation();  //reads file through the translation and updates treemap
+                       holder = nexusTranslation();  //reads file through the translation and updates treemap
                        while((c = filehandle.peek()) != EOF) { 
                                // get past comments
                                while ((c = filehandle.peek()) != EOF) {        
@@ -175,11 +175,11 @@ int ReadNewickTree::read() {
 }
 /**************************************************************************************************/
 //This function read the file through the translation of the sequences names and updates treemap.
-void ReadNewickTree::nexusTranslation() {
+string ReadNewickTree::nexusTranslation() {
        try {
                
                holder = "";
-               int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were
+               //int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were
                int comment = 0;
                
                // get past comments
@@ -191,23 +191,39 @@ void ReadNewickTree::nexusTranslation() {
                                comment = 0;
                        }
                        filehandle >> holder; 
-                       if(holder == "tree" && comment != 1){return;}
+                       if(holder == "tree" && comment != 1){return holder;}
                }
                
                //update treemap
                globaldata->gTreemap->namesOfSeqs.clear();
-               for(int i=0;i<numSeqs;i++){
-                       string number, name;
-                       filehandle >> number;
-                       filehandle >> name;
-                       name.erase(name.end()-1);  //erase the comma
+               
+               char c;
+               string number, name;
+               while ((c = filehandle.peek()) != EOF) {        
+                       
+                       filehandle >> number; 
+                       
+                       if ((number == "tree") || (number == ";") ) {  name = number; break;  }
+                       
+                       filehandle >> name; 
+                       
+                       char lastChar;
+                       if (name.length() != 0) { lastChar = name[name.length()-1]; }
+                       
+                       if ((name == "tree") || (name == ";") ) {  break;  }
+                       
+                       if (lastChar == ',') {  name.erase(name.end()-1); } //erase the comma
+                               
                        //insert new one with new name
-                       globaldata->gTreemap->treemap[toString(number)].groupname = globaldata->gTreemap->treemap[name].groupname;
-                       globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->treemap[name].vectorIndex;
+                       string group = globaldata->gTreemap->getGroup(name);
+                       globaldata->gTreemap->treemap[toString(number)].groupname = group;
+                       globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->getIndex(name);
                        //erase old one.  so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
                        globaldata->gTreemap->treemap.erase(name);
                        globaldata->gTreemap->namesOfSeqs.push_back(number);
                }
+               
+               return name;
        }
        catch(exception& e) {
                m->errorOut(e, "ReadNewickTree", "nexusTranslation");
index 0c368337a85954a94750fdba504103cccbe9057f..7cfb4b426eef1ca38e562ae1c52b4ad240afc4e3 100644 (file)
@@ -51,7 +51,7 @@ private:
        Tree* T;
        int readNewickInt(istream&, int&, Tree*);
        int readTreeString();
-       void nexusTranslation();
+       string nexusTranslation();
        ifstream filehandle;
        string treeFile;
        string holder;
index 08ee850f75a485e6f10830e1c244cc26bd4107f5..06f92cf46e22cbb28f4ff28596977d5b62fb7e02 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -50,12 +50,12 @@ Tree::Tree() {
                numNodes = 2*numLeaves - 1;
                
                tree.resize(numNodes);
-               
+                       
                //initialize groupNodeInfo
                for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
                        groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
                }
-
+               
                //initialize tree with correct number of nodes, name and group info.
                for (int i = 0; i < numNodes; i++) {
                        //initialize leaf nodes
@@ -64,6 +64,7 @@ Tree::Tree() {
                                
                                //save group info
                                string group = globaldata->gTreemap->getGroup(globaldata->Treenames[i]);
+                               
                                vector<string> tempGroups; tempGroups.push_back(group);
                                tree[i].setGroup(tempGroups);
                                groupNodeInfo[group].push_back(i); 
@@ -82,6 +83,7 @@ Tree::Tree() {
                                tree[i].setGroup(tempGroups);
                        }
                }
+               
        }
        catch(exception& e) {
                m->errorOut(e, "Tree", "Tree");
index 566fd10c8ef9ff90849896982343e6970bdeaba5..fbbc4f1de67cebeb1b9e57a4dce47f499e272f63 100644 (file)
@@ -432,7 +432,7 @@ void UnifracUnweightedCommand::createPhylipFile(int i) {
                                out << name << '\t';
                                
                                //output distances
-                               for (int l = 0; l < globaldata->Groups.size(); l++) {   out  << dists[r][l] << '\t';  }
+                               for (int l = 0; l < globaldata->Groups.size(); l++) {   out << setprecision(11) << dists[r][l] << '\t';  }
                                out << endl;
                        }else{
                                //output distances
index fb2f0b22afa3cc2143ad95afe6c45e4e093db222..312d549b11a1a4a7a4e7960d5a0689885e9a52d2 100644 (file)
@@ -172,12 +172,11 @@ EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombo
                EstOutput results; results.resize(num);
                
                int count = 0;
-               int numLeaves = t->getNumLeaves();
                int total = num;
                int twentyPercent = (total * 0.20);
                if (twentyPercent == 0) { twentyPercent = 1; }
-               
-               
+        
+                       
                for (int h = start; h < (start+num); h++) {
        //cout << namesOfGroupCombos[h][0] << '\t' << namesOfGroupCombos[h][1] << endl;         
                        if (m->control_pressed) { return results; }
@@ -185,69 +184,57 @@ EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombo
                        double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
                        double totalBL = 0.00;  //all branch lengths
                        double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
-                       map<int, double> tempTotals; //maps node to total Branch Length
-                       map<int, int> nodePcountSize; //maps node to pcountSize
-                       map<int, int>::iterator itCount;
-                               
-                       for(int i=0;i<t->getNumNodes();i++){
+                                               
+                       //find a node that belongs to one of the groups in this combo
+                       int nodeBelonging = -1;
+                       for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
+                               if (t->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = t->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
+                       }
                        
-                               if (m->control_pressed) {  return data; }
-                               
-                               //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
-                               //pcountSize = 2, not unique to one group
-                               //pcountSize = 1, unique to one group
-                               
-                               int pcountSize = 0;
-                               for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
-                                       map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
-                                       if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
-                               }
-
-                               nodePcountSize[i] = pcountSize;
-                               
-                               //cout << i << '\t' << t->tree[i].getName() << " br = " << abs(t->tree[i].getBranchLength()) << '\t';           
-                               if (pcountSize == 0) { }
-                               else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength()); }
+                       //sanity check
+                       if (nodeBelonging == -1) {
+                               m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
+                               for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
+                               m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
+                               m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
+                       }else{
+                       
+                               getRoot(t, nodeBelonging, namesOfGroupCombos[h]);
                                
-                               //if you are a leaf from a users group add to total
-                               if (i < numLeaves) {
-                                       if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
-                                       //cout << "added to total" << endl; 
-                                               totalBL += abs(t->tree[i].getBranchLength()); 
+                               for(int i=0;i<t->getNumNodes();i++){
+                                       
+                                       if (m->control_pressed) {  return data; }
+                                       
+                                       //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                                       //pcountSize = 2, not unique to one group
+                                       //pcountSize = 1, unique to one group
+                                       
+                                       int pcountSize = 0;
+                                       for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
+                                               map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
+                                               if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
+                                       }
+                                       //
+                                       //unique calc
+                                       if (pcountSize == 0) { }
+                                       else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
+                                               UniqueBL += abs(t->tree[i].getBranchLength()); 
                                        }
-                                       tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
-                               }else{ //if you are not a leaf 
-                                       //do both your chidren have have descendants from the users groups? 
-                                       int lc = t->tree[i].getLChild();
-                                       int rc = t->tree[i].getRChild();
                                        
-                                       //if yes, add your childrens tempTotals
-                                       if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
-                                               totalBL += tempTotals[lc] + tempTotals[rc]; 
-                                               //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
-                                               if (t->tree[i].getBranchLength() != -1) {
-                                                       tempTotals[i] = abs(t->tree[i].getBranchLength());
-                                               }else {
-                                                       tempTotals[i] = 0.0;
-                                               }
-                                       }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
-                                       }else { //if no, your tempTotal is your childrens temp totals + your branch length
-                                               if (t->tree[i].getBranchLength() != -1) {
-                                                       tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(t->tree[i].getBranchLength()); 
-                                               }else {
-                                                       tempTotals[i] = tempTotals[lc] + tempTotals[rc];
-                                               }
+                                       //total calc
+                                       if (pcountSize == 0) { }
+                                       else if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
+                                               totalBL += abs(t->tree[i].getBranchLength()); 
                                        }
-                                       //cout << "temptotal = "<< tempTotals[i] << endl;
+                                       
                                }
-
-                       }
-       //cout << UniqueBL << '\t' << totalBL << endl;          
-                       UW = (UniqueBL / totalBL);  
+       cout << UniqueBL << '\t' << totalBL << endl;            
+                               UW = (UniqueBL / totalBL);  
        
-                       if (isnan(UW) || isinf(UW)) { UW = 0; }
+                               if (isnan(UW) || isinf(UW)) { UW = 0; }
        
-                       results[count] = UW;
+                               results[count] = UW;
+                       }
                        count++;
 
                        //report progress
@@ -425,7 +412,6 @@ EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombo
                EstOutput results; results.resize(num);
                
                int count = 0;
-               int numLeaves = t->getNumLeaves();
                
                Tree* copyTree = new Tree;
                
@@ -442,68 +428,58 @@ EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombo
                        double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
                        double totalBL = 0.00;  //all branch lengths
                        double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
-                       map<int, double> tempTotals; //maps node to total Branch Length
-                       map<int, int> nodePcountSize; //maps node to pcountSize
-                               
-                       for(int i=0;i<copyTree->getNumNodes();i++){
+                       //find a node that belongs to one of the groups in this combo
+                       int nodeBelonging = -1;
+                       for (int g = 0; g < namesOfGroupCombos[h].size(); g++) {
+                               if (copyTree->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; }
+                       }
                        
-                               if (m->control_pressed) {  return data; }
-                               
-                               //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
-                               //pcountSize = 2, not unique to one group
-                               //pcountSize = 1, unique to one group
-                               
-                               int pcountSize = 0;
-                               for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
-                                       map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
-                                       if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
-                               }
+                       //sanity check
+                       if (nodeBelonging == -1) {
+                               m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); 
+                               for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); }
+                               m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]);
+                               m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW;
+                       }else{
                                
-                               nodePcountSize[i] = pcountSize;
-                       
-                               if (pcountSize == 0) { }
-                               else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());      }
+                               getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]);
                                
-                               //if you are a leaf from a users group add to total
-                               if (i < numLeaves) {
-                                       if ((copyTree->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
-                                               totalBL += abs(copyTree->tree[i].getBranchLength()); 
+                               for(int i=0;i<copyTree->getNumNodes();i++){
+                                       
+                                       if (m->control_pressed) {  return data; }
+                                       
+                                       //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                                       //pcountSize = 2, not unique to one group
+                                       //pcountSize = 1, unique to one group
+                                       
+                                       int pcountSize = 0;
+                                       for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
+                                               map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
+                                               if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
                                        }
-                                       tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
-                               }else{ //if you are not a leaf 
-                                       //do both your chidren have have descendants from the users groups? 
-                                       int lc = copyTree->tree[i].getLChild();
-                                       int rc = copyTree->tree[i].getRChild();
                                        
-                                       //if yes, add your childrens tempTotals
-                                       if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
-                                               totalBL += tempTotals[lc] + tempTotals[rc]; 
-                                               
-                                               if (copyTree->tree[i].getBranchLength() != -1) {
-                                                       tempTotals[i] = abs(copyTree->tree[i].getBranchLength());
-                                               }else {
-                                                       tempTotals[i] = 0.0;
-                                               }
-                                       }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
-                                       }else { //if no, your tempTotal is your childrens temp totals + your branch length
-                                               if (t->tree[i].getBranchLength() != -1) {
-                                                       tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(copyTree->tree[i].getBranchLength()); 
-                                               }else {
-                                                       tempTotals[i] = tempTotals[lc] + tempTotals[rc];
-                                               }
+                                       //unique calc
+                                       if (pcountSize == 0) { }
+                                       else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root 
+                                               UniqueBL += abs(copyTree->tree[i].getBranchLength()); 
+                                       }
+                                       
+                                       //total calc
+                                       if (pcountSize == 0) { }
+                                       else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root 
+                                               totalBL += abs(copyTree->tree[i].getBranchLength()); 
                                        }
                                        
                                }
-
+                               //cout << UniqueBL << '\t' << totalBL << endl;          
+                               UW = (UniqueBL / totalBL);  
+                               
+                               if (isnan(UW) || isinf(UW)) { UW = 0; }
+                               
+                               results[count] = UW;
                        }
-               
-                       UW = (UniqueBL / totalBL);  
-       
-                       if (isnan(UW) || isinf(UW)) { UW = 0; }
-       
-                       results[count] = UW;
                        count++;
-
+                       
                }
                
                delete copyTree;
@@ -516,5 +492,61 @@ EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombo
        }
 }
 /**************************************************************************************************/
+int Unweighted::getRoot(Tree* t, int v, vector<string> grouping) { 
+       try {
+               //you are a leaf so get your parent
+               int index = t->tree[index].getParent();
+               
+               //my parent is a potential root
+               rootForGrouping[grouping].insert(index);
+               
+               //while you aren't at root
+               while(t->tree[index].getParent() != -1){
+                       
+                       if (m->control_pressed) {  return 0; }
+                       
+                       //am I the root for this grouping? if so I want to stop "early"
+                       //does my sibling have descendants from the users groups? 
+                       //if so I am not the root
+                       int parent = t->tree[index].getParent();
+                       int lc = t->tree[parent].getLChild();
+                       int rc = t->tree[parent].getRChild();
+                       
+                       int sib = lc;
+                       if (lc == index) { sib = rc; }
+                       
+                       map<string, int>::iterator itGroup;
+                       int pcountSize = 0;
+                       for (int j = 0; j < grouping.size(); j++) {
+                               map<string, int>::iterator itGroup = t->tree[sib].pcount.find(grouping[j]);
+                               if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
+                       }
+                       
+                       //if yes, I am not the root
+                       if (pcountSize != 0) {
+                               rootForGrouping[grouping].clear();
+                               rootForGrouping[grouping].insert(parent);
+                       }
+                       
+                       index = parent; 
+               }
+               
+               //get all nodes above the root to add so we don't add their u values above
+               index = *(rootForGrouping[grouping].begin());
+               while(t->tree[index].getParent() != -1){
+                       int parent = t->tree[index].getParent();
+                       rootForGrouping[grouping].insert(parent);
+                       cout << parent << " in root" << endl;
+                       index = parent;
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Unweighted", "getRoot");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 
 
index f41326115020013fb2e59df755bff1e961001c8d..598af1a9a561fb51a5861c9e26a4c1994b1d2ad3 100644 (file)
@@ -37,12 +37,13 @@ class Unweighted : public TreeCalculator  {
                TreeMap* tmap;
                int processors;
                string outputDir;
+               map< vector<string>, set<int> > rootForGrouping;  //maps a grouping combo to the roots for that combo
                
                EstOutput driver(Tree*, vector< vector<string> >, int, int); 
                EstOutput createProcesses(Tree*, vector< vector<string> >);
                EstOutput driver(Tree*, vector< vector<string> >, int, int, bool); 
                EstOutput createProcesses(Tree*, vector< vector<string> >, bool);
-
+               int getRoot(Tree*, int, vector<string>);
 };
 
 /***********************************************************************/