]> git.donarmstrong.com Git - mothur.git/blobdiff - tree.cpp
fixed segfault in unifrac with subsample. in progress of implementing a version of...
[mothur.git] / tree.cpp
index 432811ecaabf1e50161061e291985ca080ed7773..ed652508ef2e6b049c183777ae5eecab42100d97 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -16,7 +16,7 @@ Tree::Tree(int num, TreeMap* t) : tmap(t) {
                
                numLeaves = num;  
                numNodes = 2*numLeaves - 1;
-               
+        
                tree.resize(numNodes);
        }
        catch(exception& e) {
@@ -28,9 +28,6 @@ Tree::Tree(int num, TreeMap* t) : tmap(t) {
 Tree::Tree(string g) { //do not use tree generated by this its just to extract the treenames, its a chicken before the egg thing that needs to be revisited.
        try {
                m = MothurOut::getInstance();
-               
-               tmap = NULL;
-               
                parseTreeFile();  m->runParse = false;  
        }
        catch(exception& e) {
@@ -95,7 +92,6 @@ Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
                m = MothurOut::getInstance();
                
                if (m->runParse == true) {  parseTreeFile();  m->runParse = false;  }
-        //for(int i = 0; i <   globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl;  }  
                numLeaves = m->Treenames.size();
                numNodes = 2*numLeaves - 1;
                
@@ -137,12 +133,11 @@ Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
         //build tree from matrix
         //initialize indexes
         map<int, int> indexes;  //maps row in simMatrix to vector index in the tree
-        int numGroups = (tmap->getNamesOfGroups()).size();
-        for (int g = 0; g < numGroups; g++) {  indexes[g] = g; }
+        for (int g = 0; g < numLeaves; g++) {  indexes[g] = g; }
                
                //do merges and create tree structure by setting parents and children
                //there are numGroups - 1 merges to do
-               for (int i = 0; i < (numGroups - 1); i++) {
+               for (int i = 0; i < (numLeaves - 1); i++) {
                        float largest = -1000.0;
                        
                        if (m->control_pressed) { break; }
@@ -157,11 +152,11 @@ Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
             
                        //set non-leaf node info and update leaves to know their parents
                        //non-leaf
-                       tree[numGroups + i].setChildren(indexes[row], indexes[column]);
+                       tree[numLeaves + i].setChildren(indexes[row], indexes[column]);
                        
                        //parents
-                       tree[indexes[row]].setParent(numGroups + i);
-                       tree[indexes[column]].setParent(numGroups + i);
+                       tree[indexes[row]].setParent(numLeaves + i);
+                       tree[indexes[column]].setParent(numLeaves + i);
                        
                        //blength = distance / 2;
                        float blength = ((1.0 - largest) / 2);
@@ -171,12 +166,12 @@ Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
                        tree[indexes[column]].setBranchLength(blength - tree[indexes[column]].getLengthToLeaves());
                        
                        //set your length to leaves to your childs length plus branchlength
-                       tree[numGroups + i].setLengthToLeaves(tree[indexes[row]].getLengthToLeaves() + tree[indexes[row]].getBranchLength());
+                       tree[numLeaves + i].setLengthToLeaves(tree[indexes[row]].getLengthToLeaves() + tree[indexes[row]].getBranchLength());
                        
                        
                        //update index 
-                       indexes[row] = numGroups+i;
-                       indexes[column] = numGroups+i;
+                       indexes[row] = numLeaves+i;
+                       indexes[column] = numLeaves+i;
                        
                        //remove highest value that caused the merge.
                        sims[row][column] = -1000.0;
@@ -196,7 +191,8 @@ Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
                //adjust tree to make sure root to tip length is .5
                int root = findRoot();
                tree[root].setBranchLength((0.5 - tree[root].getLengthToLeaves()));
-       }
+        
+    }
        catch(exception& e) {
                m->errorOut(e, "Tree", "Tree");
                exit(1);
@@ -329,12 +325,13 @@ void Tree::setIndex(string searchName, int index) {
        }
 }
 /*****************************************************************/
-int Tree::assembleTree() {
+int Tree::assembleTree(map<string, string> nameMap) {
        try {
-               //float A = clock();
+               //save for later
+        names = nameMap;
 
                //if user has given a names file we want to include that info in the pgroups and pcount info.
-               if(m->names.size() != 0) {  addNamesToCounts(m->names);  }
+               if(nameMap.size() != 0) {  addNamesToCounts(nameMap);  }
                
                //build the pGroups in non leaf nodes to be used in the parsimony calcs.
                for (int i = numLeaves; i < numNodes; i++) {
@@ -343,8 +340,7 @@ int Tree::assembleTree() {
                        tree[i].pGroups = (mergeGroups(i));
                        tree[i].pcount = (mergeGcounts(i));
                }
-               //float B = clock();
-               //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
+               
                return 0;
        }
        catch(exception& e) {
@@ -352,7 +348,7 @@ int Tree::assembleTree() {
                exit(1);
        }
 }
-/*****************************************************************/
+/*****************************************************************
 int Tree::assembleTree(string n) {
        try {
                
@@ -380,7 +376,8 @@ void Tree::getSubTree(Tree* Ctree, vector<string> Groups) {
         //copy Tree since we are going to destroy it
         Tree* copy = new Tree(tmap);
         copy->getCopy(Ctree);
-        copy->assembleTree("nonames");
+        map<string, string> empty;
+        copy->assembleTree(empty);
         
                //we want to select some of the leaf nodes to create the output tree
                //go through the input Tree starting at parents of leaves
@@ -591,6 +588,128 @@ int Tree::populateNewTree(vector<Node>& oldtree, int node, int& index) {
        }
 }
 /*****************************************************************/
+void Tree::getCopy(Tree* copy, map<string, string> nameMap, vector<string> namesToInclude) {
+       try {
+        
+               //for each node in the tree copy its info
+               for (int i = 0; i < numNodes; i++) {
+                       //copy name
+                       tree[i].setName(copy->tree[i].getName());
+            
+                       //copy group
+            vector<string> temp;
+                       tree[i].setGroup(temp);
+                       
+                       //copy branch length
+                       tree[i].setBranchLength(copy->tree[i].getBranchLength());
+            
+                       //copy parent
+                       tree[i].setParent(copy->tree[i].getParent());
+            
+                       //copy children
+                       tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
+            
+                       //copy index in node and tmap
+                       tree[i].setIndex(copy->tree[i].getIndex());
+                       setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
+                       
+                       //copy pGroups
+                       tree[i].pGroups.clear();
+            
+                       //copy pcount
+                       tree[i].pcount.clear();
+               }
+               
+               groupNodeInfo.clear();
+        
+        //now lets change prune the seqs not in namesToInclude by setting their group to "doNotIncludeMe"
+        for (int i = 0; i < numLeaves; i++) {
+            
+            if (m->control_pressed) { break; }
+            
+                       string name = tree[i].getName();
+            
+                       map<string, string>::iterator itNames = nameMap.find(name);
+            
+                       if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1);  }
+                       else {
+                               vector<string> dupNames;
+                               m->splitAtComma(nameMap[name], dupNames);
+                               
+                               map<string, int>::iterator itCounts;
+                               int maxPars = 1;
+                               set<string> groupsAddedForThisNode;
+                               for (int j = 0; j < dupNames.size(); j++) {
+                                       
+                                       string group = tmap->getGroup(dupNames[j]);
+                    bool includeMe = m->inUsersGroups(dupNames[j], namesToInclude);
+                    
+                    if (!includeMe && (group != "doNotIncludeMe")) { m->mothurOut("[ERROR] : creating subtree in copy.\n"); m->control_pressed = true; }
+                                       else if (!includeMe) {
+                                               if (groupsAddedForThisNode.count(group) == 0)  {  groupNodeInfo[group].push_back(i);  groupsAddedForThisNode.insert(group);  } //if you have not already added this node for this group, then add it
+                                               
+                                               //update pcounts
+                                               itCounts = tree[i].pcount.find(group);
+                                               if (itCounts == tree[i].pcount.end()) { //new group, add it
+                                                       tree[i].pcount[group] = 1;
+                                               }else {
+                                                       tree[i].pcount[group]++;
+                                               }
+                        
+                                               //update pgroups
+                                               itCounts = tree[i].pGroups.find(group);
+                                               if (itCounts == tree[i].pGroups.end()) { //new group, add it
+                                                       tree[i].pGroups[group] = 1;
+                                               }else{
+                                                       tree[i].pGroups[group]++;
+                                               }
+                                               
+                                               //keep highest group
+                                               if(tree[i].pGroups[group] > maxPars){
+                                                       maxPars = tree[i].pGroups[group];
+                                               }
+                    }
+                               }//end for
+                               
+                               if (maxPars > 1) { //then we have some more dominant groups
+                                       //erase all the groups that are less than maxPars because you found a more dominant group.
+                                       for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+                                               if(it->second < maxPars){
+                                                       tree[i].pGroups.erase(it++);
+                                               }else { it++; }
+                                       }
+                                       //set one remaining groups to 1
+                                       for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+                                               tree[i].pGroups[it->first] = 1;
+                                       }
+                               }//end if
+                               
+                               //update groups to reflect all the groups this node represents
+                               vector<string> nodeGroups;
+                               map<string, int>::iterator itGroups;
+                               for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
+                                       nodeGroups.push_back(itGroups->first);
+                               }
+                               tree[i].setGroup(nodeGroups);
+                               
+                       }//end else
+               }//end for              
+        
+        
+        //build the pGroups in non leaf nodes to be used in the parsimony calcs.
+               for (int i = numLeaves; i < numNodes; i++) {
+                       if (m->control_pressed) { break; }
+            
+                       tree[i].pGroups = (mergeGroups(i));
+                       tree[i].pcount = (mergeGcounts(i));
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "getCopy");
+               exit(1);
+       }
+}
+/*****************************************************************/
 void Tree::getCopy(Tree* copy) {
        try {
        
@@ -840,21 +959,23 @@ void Tree::randomBlengths()  {
 /*************************************************************************************************/
 void Tree::assembleRandomUnifracTree(vector<string> g) {
        randomLabels(g);
-       assembleTree("noNameCounts");
+    map<string, string> empty;
+       assembleTree(empty);
 }
 /*************************************************************************************************/
 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
-
        vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
        randomLabels(temp);
-       assembleTree("noNameCounts");
+    map<string, string> empty;
+       assembleTree(empty);
 }
 
 /*************************************************************************************************/
 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
 void Tree::assembleRandomTree() {
        randomTopology();
-       assembleTree();
+    map<string, string> empty;
+       assembleTree(empty);
 }
 /**************************************************************************************************/
 
@@ -907,6 +1028,18 @@ void Tree::print(ostream& out) {
        }
 }
 /*****************************************************************/
+void Tree::print(ostream& out, map<string, string> nameMap) {
+       try {
+               int root = findRoot();
+               printBranch(root, out, nameMap);
+               out << ";" << endl;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "print");
+               exit(1);
+       }
+}
+/*****************************************************************/
 void Tree::print(ostream& out, string mode) {
        try {
                int root = findRoot();
@@ -959,10 +1092,82 @@ int Tree::findRoot() {
        }
 }
 /*****************************************************************/
-void Tree::printBranch(int node, ostream& out, string mode) {
+void Tree::printBranch(int node, ostream& out, map<string, string> names) {
 try {
 
 // you are not a leaf
+               if (tree[node].getLChild() != -1) {
+                       out << "(";
+                       printBranch(tree[node].getLChild(), out, names);
+                       out << ",";
+                       printBranch(tree[node].getRChild(), out, names);
+                       out << ")";
+                       
+            //if there is a branch length then print it
+            if (tree[node].getBranchLength() != -1) {
+                out << ":" << tree[node].getBranchLength();
+            }
+                       
+               }else { //you are a leaf
+            map<string, string>::iterator itNames = names.find(tree[node].getName());
+            
+            string outputString = "";
+            if (itNames != names.end()) { 
+                
+                vector<string> dupNames;
+                m->splitAtComma((itNames->second), dupNames);
+                
+                if (dupNames.size() == 1) {
+                    outputString += tree[node].getName();
+                    if (tree[node].getBranchLength() != -1) {
+                        outputString += ":" + toString(tree[node].getBranchLength());
+                    }
+                }else {
+                    outputString += "(";
+                    
+                    for (int u = 0; u < dupNames.size()-1; u++) {
+                        outputString += dupNames[u];
+                        
+                        if (tree[node].getBranchLength() != -1) {
+                            outputString += ":" + toString(0.0);
+                        }
+                        outputString += ",";
+                    }
+                    
+                    outputString += dupNames[dupNames.size()-1];
+                    if (tree[node].getBranchLength() != -1) {
+                        outputString += ":" + toString(0.0);
+                    }
+                    
+                    outputString += ")";
+                    if (tree[node].getBranchLength() != -1) {
+                        outputString += ":" + toString(tree[node].getBranchLength());
+                    }
+                }
+            }else { 
+                outputString = tree[node].getName();
+                //if there is a branch length then print it
+                if (tree[node].getBranchLength() != -1) {
+                    outputString += ":" + toString(tree[node].getBranchLength());
+                }
+                
+                m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); 
+            }
+                
+            out << outputString;
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "printBranch");
+               exit(1);
+       }
+}
+/*****************************************************************/
+void Tree::printBranch(int node, ostream& out, string mode) {
+    try {
+        
+        // you are not a leaf
                if (tree[node].getLChild() != -1) {
                        out << "(";
                        printBranch(tree[node].getLChild(), out, mode);
@@ -987,11 +1192,6 @@ try {
                                if (tree[node].getBranchLength() != -1) {
                                        out << ":" << tree[node].getBranchLength();
                                }
-                       }else if (mode == "deunique") {
-                               //if there is a branch length then print it
-                               if (tree[node].getBranchLength() != -1) {
-                                       out << ":" << tree[node].getBranchLength();
-                               }
                        }
                }else { //you are a leaf
                        string leafGroup = tmap->getGroup(tree[node].getName());
@@ -1017,53 +1217,6 @@ try {
                                if (tree[node].getBranchLength() != -1) {
                                        out << ":" << tree[node].getBranchLength();
                                }
-                       }else if (mode == "deunique") {
-                               map<string, string>::iterator itNames = m->names.find(tree[node].getName());
-                               
-                               string outputString = "";
-                               if (itNames != m->names.end()) { 
-                                       
-                                       vector<string> dupNames;
-                                       m->splitAtComma((itNames->second), dupNames);
-                                       
-                                       if (dupNames.size() == 1) {
-                                               outputString += tree[node].getName();
-                                               if (tree[node].getBranchLength() != -1) {
-                                                       outputString += ":" + toString(tree[node].getBranchLength());
-                                               }
-                                       }else {
-                                               outputString += "(";
-                                               
-                                               for (int u = 0; u < dupNames.size()-1; u++) {
-                                                       outputString += dupNames[u];
-                                                       
-                                                       if (tree[node].getBranchLength() != -1) {
-                                                               outputString += ":" + toString(0.0);
-                                                       }
-                                                       outputString += ",";
-                                               }
-                                               
-                                               outputString += dupNames[dupNames.size()-1];
-                                               if (tree[node].getBranchLength() != -1) {
-                                                       outputString += ":" + toString(0.0);
-                                               }
-                                               
-                                               outputString += ")";
-                                               if (tree[node].getBranchLength() != -1) {
-                                                       outputString += ":" + toString(tree[node].getBranchLength());
-                                               }
-                                       }
-                               }else { 
-                                       outputString = tree[node].getName();
-                                       //if there is a branch length then print it
-                                       if (tree[node].getBranchLength() != -1) {
-                                               outputString += ":" + toString(tree[node].getBranchLength());
-                                       }
-                                       
-                                       m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); 
-                               }
-                                       
-                               out << outputString;
                        }
                }