]> git.donarmstrong.com Git - mothur.git/blobdiff - tree.cpp
added get.groups and remove.groups commands
[mothur.git] / tree.cpp
index ad80b2bafe98c048fb44e8e2683aae962da08894..f8bb76fd664383f148c73eca62aa5cc2291202c8 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -9,11 +9,24 @@
 
 #include "tree.h"
 
-
+/*****************************************************************/
+Tree::Tree(string g) {
+       try {
+               globaldata = GlobalData::getInstance();
+               m = MothurOut::getInstance();
+               
+               parseTreeFile();  globaldata->runParse = false;  
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "Tree - just parse");
+               exit(1);
+       }
+}
 /*****************************************************************/
 Tree::Tree() {
        try {
                globaldata = GlobalData::getInstance();
+               m = MothurOut::getInstance();
                
                if (globaldata->runParse == true) {  parseTreeFile();  globaldata->runParse = false;  }
 //for(int i = 0; i <   globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl;  }  
@@ -21,28 +34,41 @@ Tree::Tree() {
                numNodes = 2*numLeaves - 1;
                
                tree.resize(numNodes);
+               
+               //initialize groupNodeInfo
+               for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
+                       groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
+               }
 
                //initialize tree with correct number of nodes, name and group info.
                for (int i = 0; i < numNodes; i++) {
                        //initialize leaf nodes
                        if (i <= (numLeaves-1)) {
                                tree[i].setName(globaldata->Treenames[i]);
-                               tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
+                               
+                               //save group info
+                               string group = globaldata->gTreemap->getGroup(globaldata->Treenames[i]);
+                               vector<string> tempGroups; tempGroups.push_back(group);
+                               tree[i].setGroup(tempGroups);
+                               groupNodeInfo[group].push_back(i); 
+                               
                                //set pcount and pGroup for groupname to 1.
-                               tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
-                               tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
+                               tree[i].pcount[group] = 1;
+                               tree[i].pGroups[group] = 1;
+                               
                                //Treemap knows name, group and index to speed up search
                                globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
        
                        //intialize non leaf nodes
                        }else if (i > (numLeaves-1)) {
                                tree[i].setName("");
-                               tree[i].setGroup("");
+                               vector<string> tempGroups;
+                               tree[i].setGroup(tempGroups);
                        }
                }
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "Tree");
+               m->errorOut(e, "Tree", "Tree");
                exit(1);
        }
 }
@@ -50,6 +76,104 @@ Tree::Tree() {
 /*****************************************************************/
 Tree::~Tree() {}
 /*****************************************************************/
+void Tree::addNamesToCounts() {
+       try {
+               //ex. seq1      seq2,seq3,se4
+               //              seq1 = pasture
+               //              seq2 = forest
+               //              seq4 = pasture
+               //              seq3 = ocean
+               
+               //before this function seq1.pcount = pasture -> 1
+               //after                            seq1.pcount = pasture -> 2, forest -> 1, ocean -> 1
+               
+               //before this function seq1.pgroups = pasture -> 1
+               //after                            seq1.pgroups = pasture -> 1 since that is the dominant group
+
+                               
+               //go through each leaf and update its pcounts and pgroups
+               
+               //float A = clock();
+
+               for (int i = 0; i < numLeaves; i++) {
+
+                       string name = tree[i].getName();
+               
+                       map<string, string>::iterator itNames = globaldata->names.find(name);
+               
+                       if (itNames == globaldata->names.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1);  }
+                       else {
+                               vector<string> dupNames;
+                               m->splitAtComma(globaldata->names[name], dupNames);
+                               
+                               map<string, int>::iterator itCounts;
+                               int maxPars = 1;
+                               set<string> groupsAddedForThisNode;
+                               for (int j = 0; j < dupNames.size(); j++) {
+                                       
+                                       string group = globaldata->gTreemap->getGroup(dupNames[j]);
+                                       
+                                       if (dupNames[j] != name) {//you already added yourself in the constructor
+                               
+                                               if (groupsAddedForThisNode.count(group) == 0)  {  groupNodeInfo[group].push_back(i);  groupsAddedForThisNode.insert(group);  } //if you have not already added this node for this group, then add it
+                                               
+                                               //update pcounts
+                                               itCounts = tree[i].pcount.find(group);
+                                               if (itCounts == tree[i].pcount.end()) { //new group, add it
+                                                       tree[i].pcount[group] = 1;
+                                               }else {
+                                                       tree[i].pcount[group]++;
+                                               }
+                                                       
+                                               //update pgroups
+                                               itCounts = tree[i].pGroups.find(group);
+                                               if (itCounts == tree[i].pGroups.end()) { //new group, add it
+                                                       tree[i].pGroups[group] = 1;
+                                               }else {
+                                                       tree[i].pGroups[group]++;
+                                               }
+                                               
+                                               //keep highest group
+                                               if(tree[i].pGroups[group] > maxPars){
+                                                       maxPars = tree[i].pGroups[group];
+                                               }
+                                       }else {  groupsAddedForThisNode.insert(group);  } //add it so you don't add it to groupNodeInfo again
+                               }//end for
+                               
+                               if (maxPars > 1) { //then we have some more dominant groups
+                                       //erase all the groups that are less than maxPars because you found a more dominant group.
+                                       for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+                                               if(it->second < maxPars){
+                                                       tree[i].pGroups.erase(it++);
+                                               }else { it++; }
+                                       }
+                                       //set one remaining groups to 1
+                                       for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+                                               tree[i].pGroups[it->first] = 1;
+                                       }
+                               }//end if
+                               
+                               //update groups to reflect all the groups this node represents
+                               vector<string> nodeGroups;
+                               map<string, int>::iterator itGroups;
+                               for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
+                                       nodeGroups.push_back(itGroups->first);
+                               }
+                               tree[i].setGroup(nodeGroups);
+                               
+                       }//end else
+               }//end for              
+               
+               //float B = clock();
+               //cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl;     
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "addNamesToCounts");
+               exit(1);
+       }
+}
+/*****************************************************************/
 int Tree::getIndex(string searchName) {
        try {
                //Treemap knows name, group and index to speed up search
@@ -59,7 +183,7 @@ int Tree::getIndex(string searchName) {
                
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "getIndex");
+               m->errorOut(e, "Tree", "getIndex");
                exit(1);
        }
 }
@@ -71,24 +195,55 @@ void Tree::setIndex(string searchName, int index) {
                globaldata->gTreemap->setIndex(searchName, index);
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "setIndex");
+               m->errorOut(e, "Tree", "setIndex");
                exit(1);
        }
 }
 /*****************************************************************/
-void Tree::assembleTree() {
+int Tree::assembleTree() {
        try {
+               //float A = clock();
+
+               //if user has given a names file we want to include that info in the pgroups and pcount info.
+               if(globaldata->names.size() != 0) {  addNamesToCounts();  }
+               
                //build the pGroups in non leaf nodes to be used in the parsimony calcs.
                for (int i = numLeaves; i < numNodes; i++) {
+                       if (m->control_pressed) { return 1; }
+
                        tree[i].pGroups = (mergeGroups(i));
                        tree[i].pcount = (mergeGcounts(i));
                }
+               //float B = clock();
+               //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
+               return 0;
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "assembleTree");
+               m->errorOut(e, "Tree", "assembleTree");
                exit(1);
        }
 }
+/*****************************************************************/
+int Tree::assembleTree(string n) {
+       try {
+               
+               //build the pGroups in non leaf nodes to be used in the parsimony calcs.
+               for (int i = numLeaves; i < numNodes; i++) {
+                       if (m->control_pressed) { return 1; }
+
+                       tree[i].pGroups = (mergeGroups(i));
+                       tree[i].pcount = (mergeGcounts(i));
+               }
+               //float B = clock();
+               //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "assembleTree");
+               exit(1);
+       }
+}
+
 /*****************************************************************/
 void Tree::getCopy(Tree* copy) {
        try {
@@ -120,9 +275,12 @@ void Tree::getCopy(Tree* copy) {
                        //copy pcount
                        tree[i].pcount = copy->tree[i].pcount;
                }
+               
+               groupNodeInfo = copy->groupNodeInfo;
+               
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "getCopy");
+               m->errorOut(e, "Tree", "getCopy");
                exit(1);
        }
 }
@@ -175,7 +333,7 @@ map<string, int> Tree::mergeGroups(int i) {
                return parsimony;
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "mergeGroups");
+               m->errorOut(e, "Tree", "mergeGroups");
                exit(1);
        }
 }
@@ -193,14 +351,14 @@ map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
                
                //loop through nodes groups removing the ones the user doesn't want
                for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){
-                               if (inUsersGroups(it->first, g) != true) {
+                               if (m->inUsersGroups(it->first, g) != true) {
                                        tree[lc].pGroups.erase(it++);
                                }else { it++; }
                }
 
                //loop through nodes groups removing the ones the user doesn't want
                for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){
-                               if (inUsersGroups(it->first, g) != true) {
+                               if (m->inUsersGroups(it->first, g) != true) {
                                        tree[rc].pGroups.erase(it++);
                                }else { it++; }
                }
@@ -241,7 +399,7 @@ map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
                return parsimony;
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "mergeUserGroups");
+               m->errorOut(e, "Tree", "mergeUserGroups");
                exit(1);
        }
 }
@@ -264,7 +422,7 @@ map<string,int> Tree::mergeGcounts(int position) {
                return sum;
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "mergeGcounts");
+               m->errorOut(e, "Tree", "mergeGcounts");
                exit(1);
        }
 }
@@ -272,6 +430,11 @@ map<string,int> Tree::mergeGcounts(int position) {
 
 void Tree::randomLabels(vector<string> g) {
        try {
+       
+               //initialize groupNodeInfo
+               for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
+                       groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
+               }
                
                for(int i = 0; i < numLeaves; i++){
                        int z;
@@ -282,8 +445,8 @@ void Tree::randomLabels(vector<string> g) {
                        //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
                        bool treez, treei;
                
-                       treez = inUsersGroups(tree[z].getGroup(), g);
-                       treei = inUsersGroups(tree[i].getGroup(), g);
+                       treez = m->inUsersGroups(tree[z].getGroup(), g);
+                       treei = m->inUsersGroups(tree[i].getGroup(), g);
                        
                        if ((treez == true) && (treei == true)) {
                                //switches node i and node z's info.
@@ -291,7 +454,7 @@ void Tree::randomLabels(vector<string> g) {
                                tree[z].pGroups = (tree[i].pGroups);
                                tree[i].pGroups = (lib_hold);
                                
-                               string zgroup = tree[z].getGroup();
+                               vector<string> zgroup = tree[z].getGroup();
                                tree[z].setGroup(tree[i].getGroup());
                                tree[i].setGroup(zgroup);
                                
@@ -303,14 +466,17 @@ void Tree::randomLabels(vector<string> g) {
                                tree[z].pcount = (tree[i].pcount);
                                tree[i].pcount = (gcount_hold);
                        }
+                       
+                       for (int k = 0; k < (tree[i].getGroup()).size(); k++) {  groupNodeInfo[(tree[i].getGroup())[k]].push_back(i); }
+                       for (int k = 0; k < (tree[z].getGroup()).size(); k++) {  groupNodeInfo[(tree[z].getGroup())[k]].push_back(z); }
                }
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "randomLabels");
+               m->errorOut(e, "Tree", "randomLabels");
                exit(1);
        }
 }
-/**************************************************************************************************/
+/**************************************************************************************************
 
 void Tree::randomLabels(string groupA, string groupB) {
        try {
@@ -336,7 +502,7 @@ void Tree::randomLabels(string groupA, string groupB) {
                }
        }               
        catch(exception& e) {
-               errorOut(e, "Tree", "randomLabels");
+               m->errorOut(e, "Tree", "randomLabels");
                exit(1);
        }
 }
@@ -352,19 +518,21 @@ void Tree::randomBlengths()  {
                }
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "randomBlengths");
+               m->errorOut(e, "Tree", "randomBlengths");
                exit(1);
        }
 }
 /*************************************************************************************************/
 void Tree::assembleRandomUnifracTree(vector<string> g) {
        randomLabels(g);
-       assembleTree();
+       assembleTree("noNameCounts");
 }
 /*************************************************************************************************/
 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
-       randomLabels(groupA, groupB);
-       assembleTree();
+
+       vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
+       randomLabels(temp);
+       assembleTree("noNameCounts");
 }
 
 /*************************************************************************************************/
@@ -407,7 +575,7 @@ void Tree::randomTopology() {
                }
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "randomTopology");
+               m->errorOut(e, "Tree", "randomTopology");
                exit(1);
        }
 }
@@ -419,7 +587,7 @@ void Tree::print(ostream& out) {
                out << ";" << endl;
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "print");
+               m->errorOut(e, "Tree", "print");
                exit(1);
        }
 }
@@ -431,7 +599,7 @@ void Tree::printForBoot(ostream& out) {
                out << ";" << endl;
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "printForBoot");
+               m->errorOut(e, "Tree", "printForBoot");
                exit(1);
        }
 }
@@ -441,10 +609,10 @@ void Tree::printForBoot(ostream& out) {
 void Tree::createNewickFile(string f) {
        try {
                int root = findRoot();
-               //filename = getRootName(globaldata->getTreeFile()) + "newick";
+               //filename = m->getRootName(globaldata->getTreeFile()) + "newick";
                filename = f;
 
-               openOutputFile(filename, out);
+               m->openOutputFile(filename, out);
                
                printBranch(root, out, "branch");
                
@@ -453,7 +621,7 @@ void Tree::createNewickFile(string f) {
                out.close();
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "createNewickFile");
+               m->errorOut(e, "Tree", "createNewickFile");
                exit(1);
        }
 }
@@ -472,7 +640,7 @@ int Tree::findRoot() {
                return -1;
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "findRoot");
+               m->errorOut(e, "Tree", "findRoot");
                exit(1);
        }
 }
@@ -500,7 +668,9 @@ void Tree::printBranch(int node, ostream& out, string mode) {
                                }
                        }
                }else { //you are a leaf
-                       out << tree[node].getGroup(); 
+                       string leafGroup = globaldata->gTreemap->getGroup(tree[node].getName());
+                       
+                       out << leafGroup; 
                        if (mode == "branch") {
                                //if there is a branch length then print it
                                if (tree[node].getBranchLength() != -1) {
@@ -516,7 +686,7 @@ void Tree::printBranch(int node, ostream& out, string mode) {
                
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "printBranch");
+               m->errorOut(e, "Tree", "printBranch");
                exit(1);
        }
 }
@@ -540,7 +710,7 @@ void Tree::parseTreeFile() {
        try {
                string filename = globaldata->getTreeFile();
                ifstream filehandle;
-               openInputFile(filename, filehandle);
+               m->openInputFile(filename, filehandle);
                int c, comment;
                comment = 0;
                int done = 1;
@@ -610,9 +780,13 @@ void Tree::parseTreeFile() {
                        }
                }
                filehandle.close();
+               
+               //for (int i = 0; i < globaldata->Treenames.size(); i++) {
+//cout << globaldata->Treenames[i] << endl; }
+//cout << globaldata->Treenames.size() << endl;
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "parseTreeFile");
+               m->errorOut(e, "Tree", "parseTreeFile");
                exit(1);
        }
 }
@@ -676,7 +850,7 @@ int Tree::readTreeString(ifstream& filehandle)      {
                return 0;
        }
        catch(exception& e) {
-               errorOut(e, "Tree", "readTreeString");
+               m->errorOut(e, "Tree", "readTreeString");
                exit(1);
        }
 }