]> git.donarmstrong.com Git - mothur.git/blobdiff - tree.cpp
added subsample and consensus parameters to unifrac.weighted command
[mothur.git] / tree.cpp
index 08ee850f75a485e6f10830e1c244cc26bd4107f5..432811ecaabf1e50161061e291985ca080ed7773 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -10,9 +10,8 @@
 #include "tree.h"
 
 /*****************************************************************/
-Tree::Tree(int num) {
+Tree::Tree(int num, TreeMap* t) : tmap(t) {
        try {
-               globaldata = GlobalData::getInstance();
                m = MothurOut::getInstance();
                
                numLeaves = num;  
@@ -26,12 +25,13 @@ Tree::Tree(int num) {
        }
 }
 /*****************************************************************/
-Tree::Tree(string g) {
+Tree::Tree(string g) { //do not use tree generated by this its just to extract the treenames, its a chicken before the egg thing that needs to be revisited.
        try {
-               globaldata = GlobalData::getInstance();
                m = MothurOut::getInstance();
                
-               parseTreeFile();  globaldata->runParse = false;  
+               tmap = NULL;
+               
+               parseTreeFile();  m->runParse = false;  
        }
        catch(exception& e) {
                m->errorOut(e, "Tree", "Tree - just parse");
@@ -39,31 +39,31 @@ Tree::Tree(string g) {
        }
 }
 /*****************************************************************/
-Tree::Tree() {
+Tree::Tree(TreeMap* t) : tmap(t) {
        try {
-               globaldata = GlobalData::getInstance();
                m = MothurOut::getInstance();
                
-               if (globaldata->runParse == true) {  parseTreeFile();  globaldata->runParse = false;  }
+               if (m->runParse == true) {  parseTreeFile();  m->runParse = false;  }
 //for(int i = 0; i <   globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl;  }  
-               numLeaves = globaldata->Treenames.size();
+               numLeaves = m->Treenames.size();
                numNodes = 2*numLeaves - 1;
                
                tree.resize(numNodes);
-               
+                       
                //initialize groupNodeInfo
-               for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
-                       groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
+               for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
+                       groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
                }
-
+               
                //initialize tree with correct number of nodes, name and group info.
                for (int i = 0; i < numNodes; i++) {
                        //initialize leaf nodes
                        if (i <= (numLeaves-1)) {
-                               tree[i].setName(globaldata->Treenames[i]);
+                               tree[i].setName(m->Treenames[i]);
                                
                                //save group info
-                               string group = globaldata->gTreemap->getGroup(globaldata->Treenames[i]);
+                               string group = tmap->getGroup(m->Treenames[i]);
+                               
                                vector<string> tempGroups; tempGroups.push_back(group);
                                tree[i].setGroup(tempGroups);
                                groupNodeInfo[group].push_back(i); 
@@ -73,7 +73,7 @@ Tree::Tree() {
                                tree[i].pGroups[group] = 1;
                                
                                //Treemap knows name, group and index to speed up search
-                               globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
+                               tmap->setIndex(m->Treenames[i], i);
        
                        //intialize non leaf nodes
                        }else if (i > (numLeaves-1)) {
@@ -82,17 +82,130 @@ Tree::Tree() {
                                tree[i].setGroup(tempGroups);
                        }
                }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "Tree");
+               exit(1);
+       }
+}
+/*****************************************************************/
+Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
+       try {
+               m = MothurOut::getInstance();
+               
+               if (m->runParse == true) {  parseTreeFile();  m->runParse = false;  }
+        //for(int i = 0; i <   globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl;  }  
+               numLeaves = m->Treenames.size();
+               numNodes = 2*numLeaves - 1;
+               
+               tree.resize(numNodes);
+        
+               //initialize groupNodeInfo
+               for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
+                       groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
+               }
+               
+               //initialize tree with correct number of nodes, name and group info.
+               for (int i = 0; i < numNodes; i++) {
+                       //initialize leaf nodes
+                       if (i <= (numLeaves-1)) {
+                               tree[i].setName(m->Treenames[i]);
+                               
+                               //save group info
+                               string group = tmap->getGroup(m->Treenames[i]);
+                               
+                               vector<string> tempGroups; tempGroups.push_back(group);
+                               tree[i].setGroup(tempGroups);
+                               groupNodeInfo[group].push_back(i); 
+                               
+                               //set pcount and pGroup for groupname to 1.
+                               tree[i].pcount[group] = 1;
+                               tree[i].pGroups[group] = 1;
+                               
+                               //Treemap knows name, group and index to speed up search
+                               tmap->setIndex(m->Treenames[i], i);
+                
+                //intialize non leaf nodes
+                       }else if (i > (numLeaves-1)) {
+                               tree[i].setName("");
+                               vector<string> tempGroups;
+                               tree[i].setGroup(tempGroups);
+                       }
+               }
+        
+        //build tree from matrix
+        //initialize indexes
+        map<int, int> indexes;  //maps row in simMatrix to vector index in the tree
+        int numGroups = (tmap->getNamesOfGroups()).size();
+        for (int g = 0; g < numGroups; g++) {  indexes[g] = g; }
+               
+               //do merges and create tree structure by setting parents and children
+               //there are numGroups - 1 merges to do
+               for (int i = 0; i < (numGroups - 1); i++) {
+                       float largest = -1000.0;
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       int row, column;
+                       //find largest value in sims matrix by searching lower triangle
+                       for (int j = 1; j < sims.size(); j++) {
+                               for (int k = 0; k < j; k++) {
+                                       if (sims[j][k] > largest) {  largest = sims[j][k]; row = j; column = k;  }
+                               }
+                       }
+            
+                       //set non-leaf node info and update leaves to know their parents
+                       //non-leaf
+                       tree[numGroups + i].setChildren(indexes[row], indexes[column]);
+                       
+                       //parents
+                       tree[indexes[row]].setParent(numGroups + i);
+                       tree[indexes[column]].setParent(numGroups + i);
+                       
+                       //blength = distance / 2;
+                       float blength = ((1.0 - largest) / 2);
+                       
+                       //branchlengths
+                       tree[indexes[row]].setBranchLength(blength - tree[indexes[row]].getLengthToLeaves());
+                       tree[indexes[column]].setBranchLength(blength - tree[indexes[column]].getLengthToLeaves());
+                       
+                       //set your length to leaves to your childs length plus branchlength
+                       tree[numGroups + i].setLengthToLeaves(tree[indexes[row]].getLengthToLeaves() + tree[indexes[row]].getBranchLength());
+                       
+                       
+                       //update index 
+                       indexes[row] = numGroups+i;
+                       indexes[column] = numGroups+i;
+                       
+                       //remove highest value that caused the merge.
+                       sims[row][column] = -1000.0;
+                       sims[column][row] = -1000.0;
+                       
+                       //merge values in simsMatrix
+                       for (int n = 0; n < sims.size(); n++)   {
+                               //row becomes merge of 2 groups
+                               sims[row][n] = (sims[row][n] + sims[column][n]) / 2;
+                               sims[n][row] = sims[row][n];
+                               //delete column
+                               sims[column][n] = -1000.0;
+                               sims[n][column] = -1000.0;
+                       }
+               }
+               
+               //adjust tree to make sure root to tip length is .5
+               int root = findRoot();
+               tree[root].setBranchLength((0.5 - tree[root].getLengthToLeaves()));
        }
        catch(exception& e) {
                m->errorOut(e, "Tree", "Tree");
                exit(1);
        }
 }
-
 /*****************************************************************/
 Tree::~Tree() {}
 /*****************************************************************/
-void Tree::addNamesToCounts() {
+void Tree::addNamesToCounts(map<string, string> nameMap) {
        try {
                //ex. seq1      seq2,seq3,se4
                //              seq1 = pasture
@@ -115,19 +228,19 @@ void Tree::addNamesToCounts() {
 
                        string name = tree[i].getName();
                
-                       map<string, string>::iterator itNames = globaldata->names.find(name);
+                       map<string, string>::iterator itNames = nameMap.find(name);
                
-                       if (itNames == globaldata->names.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1);  }
+                       if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1);  }
                        else {
                                vector<string> dupNames;
-                               m->splitAtComma(globaldata->names[name], dupNames);
+                               m->splitAtComma(nameMap[name], dupNames);
                                
                                map<string, int>::iterator itCounts;
                                int maxPars = 1;
                                set<string> groupsAddedForThisNode;
                                for (int j = 0; j < dupNames.size(); j++) {
                                        
-                                       string group = globaldata->gTreemap->getGroup(dupNames[j]);
+                                       string group = tmap->getGroup(dupNames[j]);
                                        
                                        if (dupNames[j] != name) {//you already added yourself in the constructor
                                
@@ -194,7 +307,7 @@ int Tree::getIndex(string searchName) {
        try {
                //Treemap knows name, group and index to speed up search
                // getIndex function will return the vector index or -1 if seq is not found.
-               int index = globaldata->gTreemap->getIndex(searchName);
+               int index = tmap->getIndex(searchName);
                return index;
                
        }
@@ -208,7 +321,7 @@ int Tree::getIndex(string searchName) {
 void Tree::setIndex(string searchName, int index) {
        try {
                //set index in treemap
-               globaldata->gTreemap->setIndex(searchName, index);
+               tmap->setIndex(searchName, index);
        }
        catch(exception& e) {
                m->errorOut(e, "Tree", "setIndex");
@@ -221,7 +334,7 @@ int Tree::assembleTree() {
                //float A = clock();
 
                //if user has given a names file we want to include that info in the pgroups and pcount info.
-               if(globaldata->names.size() != 0) {  addNamesToCounts();  }
+               if(m->names.size() != 0) {  addNamesToCounts(m->names);  }
                
                //build the pGroups in non leaf nodes to be used in the parsimony calcs.
                for (int i = numLeaves; i < numNodes; i++) {
@@ -260,9 +373,15 @@ int Tree::assembleTree(string n) {
        }
 }
 /*****************************************************************/
-void Tree::getSubTree(Tree* copy, vector<string> Groups) {
+//assumes leaf node names are in groups and no names file - used by indicator command
+void Tree::getSubTree(Tree* Ctree, vector<string> Groups) {
        try {
-                       
+        
+        //copy Tree since we are going to destroy it
+        Tree* copy = new Tree(tmap);
+        copy->getCopy(Ctree);
+        copy->assembleTree("nonames");
+        
                //we want to select some of the leaf nodes to create the output tree
                //go through the input Tree starting at parents of leaves
                for (int i = 0; i < numNodes; i++) {
@@ -272,7 +391,7 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                                tree[i].setName(Groups[i]);
                                
                                //save group info
-                               string group = globaldata->gTreemap->getGroup(Groups[i]);
+                               string group = tmap->getGroup(Groups[i]);
                                vector<string> tempGroups; tempGroups.push_back(group);
                                tree[i].setGroup(tempGroups);
                                groupNodeInfo[group].push_back(i); 
@@ -282,7 +401,7 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                                tree[i].pGroups[group] = 1;
                                
                                //Treemap knows name, group and index to speed up search
-                               globaldata->gTreemap->setIndex(Groups[i], i);
+                               tmap->setIndex(Groups[i], i);
                                
                                //intialize non leaf nodes
                        }else if (i > (numLeaves-1)) {
@@ -328,7 +447,9 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                                                
                                                copy->tree[i].setParent(grandparent);
                                                copy->tree[i].setBranchLength((copy->tree[i].getBranchLength()+copy->tree[parent].getBranchLength()));
-                                               copy->tree[grandparent].setChildren(grandparentLC, grandparentRC);
+                                               if (grandparent != -1) {
+                                                       copy->tree[grandparent].setChildren(grandparentLC, grandparentRC);
+                                               }
                                                removedLeaves.insert(sibIndex);
                                        }
                                }else{
@@ -355,7 +476,9 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                                                
                                                copy->tree[sibIndex].setParent(grandparent);
                                                copy->tree[sibIndex].setBranchLength((copy->tree[sibIndex].getBranchLength()+copy->tree[parent].getBranchLength()));
-                                               copy->tree[grandparent].setChildren(grandparentLC, grandparentRC);
+                                               if (grandparent != -1) {
+                                                       copy->tree[grandparent].setChildren(grandparentLC, grandparentRC);
+                                               }
                                                removedLeaves.insert(i);
                                        }else{
                                                //neither of us are, so we want to eliminate ourselves and our parent
@@ -365,8 +488,11 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                                                int parentsSibIndex;
                                                if (grandparent != -1) {
                                                        int greatgrandparent = copy->tree[grandparent].getParent();
-                                                       int greatgrandparentLC = copy->tree[greatgrandparent].getLChild();
-                                                       int greatgrandparentRC = copy->tree[greatgrandparent].getRChild();
+                                                       int greatgrandparentLC, greatgrandparentRC;
+                                                       if (greatgrandparent != -1) {
+                                                               greatgrandparentLC = copy->tree[greatgrandparent].getLChild();
+                                                               greatgrandparentRC = copy->tree[greatgrandparent].getRChild();
+                                                       }
                                                        
                                                        int grandparentLC = copy->tree[grandparent].getLChild();
                                                        int grandparentRC = copy->tree[grandparent].getRChild();
@@ -380,10 +506,12 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                                                        
                                                        copy->tree[parentsSibIndex].setParent(greatgrandparent);
                                                        copy->tree[parentsSibIndex].setBranchLength((copy->tree[parentsSibIndex].getBranchLength()+copy->tree[grandparent].getBranchLength()));
-                                                       copy->tree[greatgrandparent].setChildren(greatgrandparentLC, greatgrandparentRC);
+                                                       if (greatgrandparent != -1) {
+                                                               copy->tree[greatgrandparent].setChildren(greatgrandparentLC, greatgrandparentRC);
+                                                       }
                                                }else{
-                                                       copy->tree[parent].setChildren(-1, -1);
-                                                       cout << "issues with making subtree" << endl;
+                                                       copy->tree[parent].setParent(-1);
+                                                       //cout << "issues with making subtree" << endl;
                                                }
                                                removedLeaves.insert(sibIndex);
                                                removedLeaves.insert(i);
@@ -398,13 +526,40 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                        //you found the root
                        if (copy->tree[i].getParent() == -1) { root = i; break; }
                }
-               
+        
                int nextSpot = numLeaves;
                populateNewTree(copy->tree, root, nextSpot);
-                               
+        
+        delete copy;
        }
        catch(exception& e) {
-               m->errorOut(e, "Tree", "getCopy");
+               m->errorOut(e, "Tree", "getSubTree");
+               exit(1);
+       }
+}
+/*****************************************************************/
+//assumes nameMap contains unique names as key or is empty. 
+//assumes numLeaves defined in tree constructor equals size of seqsToInclude and seqsToInclude only contains unique seqs.
+int Tree::getSubTree(Tree* copy, vector<string> seqsToInclude, map<string, string> nameMap) {
+       try {
+        
+        if (numLeaves != seqsToInclude.size()) { m->mothurOut("[ERROR]: numLeaves does not equal numUniques, cannot create subtree.\n"); m->control_pressed = true; return 0; }
+        
+        getSubTree(copy, seqsToInclude);
+        if (nameMap.size() != 0) {  addNamesToCounts(nameMap);  }
+        
+        //build the pGroups in non leaf nodes to be used in the parsimony calcs.
+               for (int i = numLeaves; i < numNodes; i++) {
+                       if (m->control_pressed) { return 1; }
+            
+                       tree[i].pGroups = (mergeGroups(i));
+                       tree[i].pcount = (mergeGcounts(i));
+               }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "getSubTree");
                exit(1);
        }
 }
@@ -426,7 +581,7 @@ int Tree::populateNewTree(vector<Node>& oldtree, int node, int& index) {
                        
                        return (index++);
                }else { //you are a leaf
-                       int indexInNewTree = globaldata->gTreemap->getIndex(oldtree[node].getName());
+                       int indexInNewTree = tmap->getIndex(oldtree[node].getName());
                        return indexInNewTree;
                }
        }
@@ -618,13 +773,12 @@ map<string,int> Tree::mergeGcounts(int position) {
        }
 }
 /**************************************************************************************************/
-
 void Tree::randomLabels(vector<string> g) {
        try {
        
                //initialize groupNodeInfo
-               for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
-                       groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
+               for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
+                       groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
                }
                
                for(int i = 0; i < numLeaves; i++){
@@ -667,36 +821,6 @@ void Tree::randomLabels(vector<string> g) {
                exit(1);
        }
 }
-/**************************************************************************************************
-
-void Tree::randomLabels(string groupA, string groupB) {
-       try {
-               int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
-               int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
-
-               vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
-               for(int i=numSeqsA;i<randomGroups.size();i++){
-                       randomGroups[i] = groupB;
-               }
-               random_shuffle(randomGroups.begin(), randomGroups.end());
-                               
-               int randomCounter = 0;                          
-               for(int i=0;i<numLeaves;i++){
-                       if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
-                               tree[i].setGroup(randomGroups[randomCounter]);
-                               tree[i].pcount.clear();
-                               tree[i].pcount[randomGroups[randomCounter]] = 1;
-                               tree[i].pGroups.clear();
-                               tree[i].pGroups[randomGroups[randomCounter]] = 1;
-                               randomCounter++;
-                       }
-               }
-       }               
-       catch(exception& e) {
-               m->errorOut(e, "Tree", "randomLabels");
-               exit(1);
-       }
-}
 /**************************************************************************************************/
 void Tree::randomBlengths()  {
        try {
@@ -799,7 +923,7 @@ void Tree::print(ostream& out, string mode) {
 void Tree::createNewickFile(string f) {
        try {
                int root = findRoot();
-               //filename = m->getRootName(globaldata->getTreeFile()) + "newick";
+       
                filename = f;
 
                m->openOutputFile(filename, out);
@@ -863,9 +987,14 @@ try {
                                if (tree[node].getBranchLength() != -1) {
                                        out << ":" << tree[node].getBranchLength();
                                }
+                       }else if (mode == "deunique") {
+                               //if there is a branch length then print it
+                               if (tree[node].getBranchLength() != -1) {
+                                       out << ":" << tree[node].getBranchLength();
+                               }
                        }
                }else { //you are a leaf
-                       string leafGroup = globaldata->gTreemap->getGroup(tree[node].getName());
+                       string leafGroup = tmap->getGroup(tree[node].getName());
                        
                        if (mode == "branch") {
                                out << leafGroup; 
@@ -888,6 +1017,53 @@ try {
                                if (tree[node].getBranchLength() != -1) {
                                        out << ":" << tree[node].getBranchLength();
                                }
+                       }else if (mode == "deunique") {
+                               map<string, string>::iterator itNames = m->names.find(tree[node].getName());
+                               
+                               string outputString = "";
+                               if (itNames != m->names.end()) { 
+                                       
+                                       vector<string> dupNames;
+                                       m->splitAtComma((itNames->second), dupNames);
+                                       
+                                       if (dupNames.size() == 1) {
+                                               outputString += tree[node].getName();
+                                               if (tree[node].getBranchLength() != -1) {
+                                                       outputString += ":" + toString(tree[node].getBranchLength());
+                                               }
+                                       }else {
+                                               outputString += "(";
+                                               
+                                               for (int u = 0; u < dupNames.size()-1; u++) {
+                                                       outputString += dupNames[u];
+                                                       
+                                                       if (tree[node].getBranchLength() != -1) {
+                                                               outputString += ":" + toString(0.0);
+                                                       }
+                                                       outputString += ",";
+                                               }
+                                               
+                                               outputString += dupNames[dupNames.size()-1];
+                                               if (tree[node].getBranchLength() != -1) {
+                                                       outputString += ":" + toString(0.0);
+                                               }
+                                               
+                                               outputString += ")";
+                                               if (tree[node].getBranchLength() != -1) {
+                                                       outputString += ":" + toString(tree[node].getBranchLength());
+                                               }
+                                       }
+                               }else { 
+                                       outputString = tree[node].getName();
+                                       //if there is a branch length then print it
+                                       if (tree[node].getBranchLength() != -1) {
+                                               outputString += ":" + toString(tree[node].getBranchLength());
+                                       }
+                                       
+                                       m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); 
+                               }
+                                       
+                               out << outputString;
                        }
                }
                
@@ -928,7 +1104,7 @@ void Tree::printBranch(int node, ostream& out, string mode, vector<Node>& theseN
                                }
                        }
                }else { //you are a leaf
-                       string leafGroup = globaldata->gTreemap->getGroup(theseNodes[node].getName());
+                       string leafGroup = tmap->getGroup(theseNodes[node].getName());
                        
                        if (mode == "branch") {
                                out << leafGroup; 
@@ -977,7 +1153,7 @@ void Tree::parseTreeFile() {
        
        //only takes names from the first tree and assumes that all trees use the same names.
        try {
-               string filename = globaldata->getTreeFile();
+               string filename = m->getTreeFile();
                ifstream filehandle;
                m->openInputFile(filename, filehandle);
                int c, comment;
@@ -1043,9 +1219,9 @@ void Tree::parseTreeFile() {
                                        //c = , until done with translation then c = ;
                                        h = name.substr(name.length()-1, name.length()); 
                                        name.erase(name.end()-1);  //erase the comma
-                                       globaldata->Treenames.push_back(number);
+                                       m->Treenames.push_back(number);
                                }
-                               if(number == ";") { globaldata->Treenames.pop_back(); }  //in case ';' from translation is on next line instead of next to last name
+                               if(number == ";") { m->Treenames.pop_back(); }  //in case ';' from translation is on next line instead of next to last name
                        }
                }
                filehandle.close();
@@ -1092,7 +1268,7 @@ int Tree::readTreeString(ifstream& filehandle)    {
                                }
                                
 //cout << "name = " << name << endl;
-                               globaldata->Treenames.push_back(name);
+                               m->Treenames.push_back(name);
                                filehandle.putback(c);
 //k = c;
 //cout << " after putback" <<  k << endl;