#include "tree.h"
/*****************************************************************/
-Tree::Tree(int num, TreeMap* t) : tmap(t) {
+Tree::Tree(int num, CountTable* t) : ct(t) {
try {
m = MothurOut::getInstance();
}
}
/*****************************************************************/
-Tree::Tree(TreeMap* t) : tmap(t) {
+Tree::Tree(CountTable* t) : ct(t) {
try {
m = MothurOut::getInstance();
if (m->runParse == true) { parseTreeFile(); m->runParse = false; }
-//for(int i = 0; i < globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl; }
+
numLeaves = m->Treenames.size();
numNodes = 2*numLeaves - 1;
tree.resize(numNodes);
//initialize groupNodeInfo
- for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
- groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
- }
+ vector<string> namesOfGroups = ct->getNamesOfGroups();
+ for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); }
//initialize tree with correct number of nodes, name and group info.
for (int i = 0; i < numNodes; i++) {
tree[i].setName(m->Treenames[i]);
//save group info
- string group = tmap->getGroup(m->Treenames[i]);
-
- vector<string> tempGroups; tempGroups.push_back(group);
- tree[i].setGroup(tempGroups);
- groupNodeInfo[group].push_back(i);
-
- //set pcount and pGroup for groupname to 1.
- tree[i].pcount[group] = 1;
- tree[i].pGroups[group] = 1;
-
- //Treemap knows name, group and index to speed up search
- tmap->setIndex(m->Treenames[i], i);
-
+ int maxPars = 1;
+ vector<string> group;
+ vector<int> counts = ct->getGroupCounts(m->Treenames[i]);
+ for (int j = 0; j < namesOfGroups.size(); j++) {
+ if (counts[j] != 0) { //you have seqs from this group
+ groupNodeInfo[namesOfGroups[j]].push_back(i);
+ group.push_back(namesOfGroups[j]);
+ tree[i].pGroups[namesOfGroups[j]] = counts[j];
+ tree[i].pcount[namesOfGroups[j]] = counts[j];
+ //keep highest group
+ if(counts[j] > maxPars){ maxPars = counts[j]; }
+ }
+ }
+ tree[i].setGroup(group);
+ setIndex(m->Treenames[i], i);
+
+ if (maxPars > 1) { //then we have some more dominant groups
+ //erase all the groups that are less than maxPars because you found a more dominant group.
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+ if(it->second < maxPars){
+ tree[i].pGroups.erase(it++);
+ }else { it++; }
+ }
+ //set one remaining groups to 1
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+ tree[i].pGroups[it->first] = 1;
+ }
+ }//end if
+
//intialize non leaf nodes
}else if (i > (numLeaves-1)) {
tree[i].setName("");
}
}
/*****************************************************************/
-Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
+Tree::Tree(CountTable* t, vector< vector<double> >& sims) : ct(t) {
try {
m = MothurOut::getInstance();
tree.resize(numNodes);
//initialize groupNodeInfo
- for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
- groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
- }
+ vector<string> namesOfGroups = ct->getNamesOfGroups();
+ for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); }
//initialize tree with correct number of nodes, name and group info.
for (int i = 0; i < numNodes; i++) {
tree[i].setName(m->Treenames[i]);
//save group info
- string group = tmap->getGroup(m->Treenames[i]);
-
- vector<string> tempGroups; tempGroups.push_back(group);
- tree[i].setGroup(tempGroups);
- groupNodeInfo[group].push_back(i);
-
- //set pcount and pGroup for groupname to 1.
- tree[i].pcount[group] = 1;
- tree[i].pGroups[group] = 1;
-
- //Treemap knows name, group and index to speed up search
- tmap->setIndex(m->Treenames[i], i);
+ int maxPars = 1;
+ vector<string> group;
+ vector<int> counts = ct->getGroupCounts(m->Treenames[i]);
+ for (int j = 0; j < namesOfGroups.size(); j++) {
+ if (counts[j] != 0) { //you have seqs from this group
+ groupNodeInfo[namesOfGroups[j]].push_back(i);
+ group.push_back(namesOfGroups[j]);
+ tree[i].pGroups[namesOfGroups[j]] = counts[j];
+ tree[i].pcount[namesOfGroups[j]] = counts[j];
+ //keep highest group
+ if(counts[j] > maxPars){ maxPars = counts[j]; }
+ }
+ }
+ tree[i].setGroup(group);
+ setIndex(m->Treenames[i], i);
+
+ if (maxPars > 1) { //then we have some more dominant groups
+ //erase all the groups that are less than maxPars because you found a more dominant group.
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+ if(it->second < maxPars){
+ tree[i].pGroups.erase(it++);
+ }else { it++; }
+ }
+ //set one remaining groups to 1
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+ tree[i].pGroups[it->first] = 1;
+ }
+ }//end if
//intialize non leaf nodes
}else if (i > (numLeaves-1)) {
tree[i].setGroup(tempGroups);
}
}
+
//build tree from matrix
//initialize indexes
- map<int, int> indexes; //maps row in simMatrix to vector index in the tree
- for (int g = 0; g < numLeaves; g++) { indexes[g] = g; }
+ map<int, int> thisIndexes; //maps row in simMatrix to vector index in the tree
+ for (int g = 0; g < numLeaves; g++) { thisIndexes[g] = g; }
//do merges and create tree structure by setting parents and children
//there are numGroups - 1 merges to do
//set non-leaf node info and update leaves to know their parents
//non-leaf
- tree[numLeaves + i].setChildren(indexes[row], indexes[column]);
+ tree[numLeaves + i].setChildren(thisIndexes[row], thisIndexes[column]);
//parents
- tree[indexes[row]].setParent(numLeaves + i);
- tree[indexes[column]].setParent(numLeaves + i);
+ tree[thisIndexes[row]].setParent(numLeaves + i);
+ tree[thisIndexes[column]].setParent(numLeaves + i);
//blength = distance / 2;
float blength = ((1.0 - largest) / 2);
//branchlengths
- tree[indexes[row]].setBranchLength(blength - tree[indexes[row]].getLengthToLeaves());
- tree[indexes[column]].setBranchLength(blength - tree[indexes[column]].getLengthToLeaves());
+ tree[thisIndexes[row]].setBranchLength(blength - tree[thisIndexes[row]].getLengthToLeaves());
+ tree[thisIndexes[column]].setBranchLength(blength - tree[thisIndexes[column]].getLengthToLeaves());
//set your length to leaves to your childs length plus branchlength
- tree[numLeaves + i].setLengthToLeaves(tree[indexes[row]].getLengthToLeaves() + tree[indexes[row]].getBranchLength());
+ tree[numLeaves + i].setLengthToLeaves(tree[thisIndexes[row]].getLengthToLeaves() + tree[thisIndexes[row]].getBranchLength());
//update index
- indexes[row] = numLeaves+i;
- indexes[column] = numLeaves+i;
+ thisIndexes[row] = numLeaves+i;
+ thisIndexes[column] = numLeaves+i;
//remove highest value that caused the merge.
sims[row][column] = -1000.0;
}
/*****************************************************************/
Tree::~Tree() {}
-/*****************************************************************/
+/*****************************************************************
void Tree::addNamesToCounts(map<string, string> nameMap) {
try {
//ex. seq1 seq2,seq3,se4
m->errorOut(e, "Tree", "addNamesToCounts");
exit(1);
}
-}
+}*/
/*****************************************************************/
int Tree::getIndex(string searchName) {
try {
- //Treemap knows name, group and index to speed up search
- // getIndex function will return the vector index or -1 if seq is not found.
- int index = tmap->getIndex(searchName);
- return index;
-
+ map<string, int>::iterator itIndex = indexes.find(searchName);
+ if (itIndex != indexes.end()) {
+ return itIndex->second;
+ }
+ return -1;
}
catch(exception& e) {
m->errorOut(e, "Tree", "getIndex");
void Tree::setIndex(string searchName, int index) {
try {
- //set index in treemap
- tmap->setIndex(searchName, index);
+ map<string, int>::iterator itIndex = indexes.find(searchName);
+ if (itIndex == indexes.end()) {
+ indexes[searchName] = index;
+ }
}
catch(exception& e) {
m->errorOut(e, "Tree", "setIndex");
}
}
/*****************************************************************/
-int Tree::assembleTree(map<string, string> nameMap) {
- try {
- //save for later
- names = nameMap;
-
- //if user has given a names file we want to include that info in the pgroups and pcount info.
- if(nameMap.size() != 0) { addNamesToCounts(nameMap); }
-
+int Tree::assembleTree() {
+ try {
//build the pGroups in non leaf nodes to be used in the parsimony calcs.
for (int i = numLeaves; i < numNodes; i++) {
if (m->control_pressed) { return 1; }
exit(1);
}
}
-/*****************************************************************
-int Tree::assembleTree(string n) {
- try {
-
- //build the pGroups in non leaf nodes to be used in the parsimony calcs.
- for (int i = numLeaves; i < numNodes; i++) {
- if (m->control_pressed) { return 1; }
-
- tree[i].pGroups = (mergeGroups(i));
- tree[i].pcount = (mergeGcounts(i));
- }
- //float B = clock();
- //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "Tree", "assembleTree");
- exit(1);
- }
-}
/*****************************************************************/
//assumes leaf node names are in groups and no names file - used by indicator command
void Tree::getSubTree(Tree* Ctree, vector<string> Groups) {
try {
//copy Tree since we are going to destroy it
- Tree* copy = new Tree(tmap);
+ Tree* copy = new Tree(ct);
copy->getCopy(Ctree);
- map<string, string> empty;
- copy->assembleTree(empty);
+ copy->assembleTree();
//we want to select some of the leaf nodes to create the output tree
//go through the input Tree starting at parents of leaves
+ //initialize groupNodeInfo
+ vector<string> namesOfGroups = ct->getNamesOfGroups();
+ for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); }
+
+ //initialize tree with correct number of nodes, name and group info.
for (int i = 0; i < numNodes; i++) {
-
//initialize leaf nodes
if (i <= (numLeaves-1)) {
tree[i].setName(Groups[i]);
//save group info
- string group = tmap->getGroup(Groups[i]);
- vector<string> tempGroups; tempGroups.push_back(group);
- tree[i].setGroup(tempGroups);
- groupNodeInfo[group].push_back(i);
-
- //set pcount and pGroup for groupname to 1.
- tree[i].pcount[group] = 1;
- tree[i].pGroups[group] = 1;
-
- //Treemap knows name, group and index to speed up search
- tmap->setIndex(Groups[i], i);
-
- //intialize non leaf nodes
+ int maxPars = 1;
+ vector<string> group;
+ vector<int> counts = ct->getGroupCounts(Groups[i]);
+ for (int j = 0; j < namesOfGroups.size(); j++) {
+ if (counts[j] != 0) { //you have seqs from this group
+ groupNodeInfo[namesOfGroups[j]].push_back(i);
+ group.push_back(namesOfGroups[j]);
+ tree[i].pGroups[namesOfGroups[j]] = counts[j];
+ tree[i].pcount[namesOfGroups[j]] = counts[j];
+ //keep highest group
+ if(counts[j] > maxPars){ maxPars = counts[j]; }
+ }
+ }
+ tree[i].setGroup(group);
+ setIndex(Groups[i], i);
+
+ if (maxPars > 1) { //then we have some more dominant groups
+ //erase all the groups that are less than maxPars because you found a more dominant group.
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+ if(it->second < maxPars){
+ tree[i].pGroups.erase(it++);
+ }else { it++; }
+ }
+ //set one remaining groups to 1
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+ tree[i].pGroups[it->first] = 1;
+ }
+ }//end if
+
+ //intialize non leaf nodes
}else if (i > (numLeaves-1)) {
tree[i].setName("");
vector<string> tempGroups;
tree[i].setGroup(tempGroups);
}
}
-
+
set<int> removedLeaves;
for (int i = 0; i < copy->getNumLeaves(); i++) {
exit(1);
}
}
-/*****************************************************************/
+/*****************************************************************
//assumes nameMap contains unique names as key or is empty.
//assumes numLeaves defined in tree constructor equals size of seqsToInclude and seqsToInclude only contains unique seqs.
int Tree::getSubTree(Tree* copy, vector<string> seqsToInclude, map<string, string> nameMap) {
return (index++);
}else { //you are a leaf
- int indexInNewTree = tmap->getIndex(oldtree[node].getName());
+ int indexInNewTree = getIndex(oldtree[node].getName());
return indexInNewTree;
}
}
}
}
/*****************************************************************/
-void Tree::getCopy(Tree* copy, map<string, string> nameMap, vector<string> namesToInclude) {
+void Tree::getCopy(Tree* copy, bool subsample) {
try {
//for each node in the tree copy its info
for (int i = 0; i < numNodes; i++) {
- //copy name
- tree[i].setName(copy->tree[i].getName());
-
- //copy group
- vector<string> temp;
- tree[i].setGroup(temp);
-
//copy branch length
tree[i].setBranchLength(copy->tree[i].getBranchLength());
//copy children
tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
-
- //copy index in node and tmap
- tree[i].setIndex(copy->tree[i].getIndex());
- setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
-
- //copy pGroups
- tree[i].pGroups.clear();
-
- //copy pcount
- tree[i].pcount.clear();
- }
-
- groupNodeInfo.clear();
-
- //now lets change prune the seqs not in namesToInclude by setting their group to "doNotIncludeMe"
- for (int i = 0; i < numLeaves; i++) {
-
- if (m->control_pressed) { break; }
-
- string name = tree[i].getName();
-
- map<string, string>::iterator itNames = nameMap.find(name);
-
- if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); }
- else {
- vector<string> dupNames;
- m->splitAtComma(nameMap[name], dupNames);
-
- map<string, int>::iterator itCounts;
- int maxPars = 1;
- set<string> groupsAddedForThisNode;
- for (int j = 0; j < dupNames.size(); j++) {
-
- string group = tmap->getGroup(dupNames[j]);
- bool includeMe = m->inUsersGroups(dupNames[j], namesToInclude);
-
- if (!includeMe && (group != "doNotIncludeMe")) { m->mothurOut("[ERROR] : creating subtree in copy.\n"); m->control_pressed = true; }
- else if (!includeMe) {
- if (groupsAddedForThisNode.count(group) == 0) { groupNodeInfo[group].push_back(i); groupsAddedForThisNode.insert(group); } //if you have not already added this node for this group, then add it
-
- //update pcounts
- itCounts = tree[i].pcount.find(group);
- if (itCounts == tree[i].pcount.end()) { //new group, add it
- tree[i].pcount[group] = 1;
- }else {
- tree[i].pcount[group]++;
- }
-
- //update pgroups
- itCounts = tree[i].pGroups.find(group);
- if (itCounts == tree[i].pGroups.end()) { //new group, add it
- tree[i].pGroups[group] = 1;
- }else{
- tree[i].pGroups[group]++;
- }
-
- //keep highest group
- if(tree[i].pGroups[group] > maxPars){
- maxPars = tree[i].pGroups[group];
- }
- }
- }//end for
-
- if (maxPars > 1) { //then we have some more dominant groups
- //erase all the groups that are less than maxPars because you found a more dominant group.
- for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
- if(it->second < maxPars){
- tree[i].pGroups.erase(it++);
- }else { it++; }
- }
- //set one remaining groups to 1
- for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
- tree[i].pGroups[it->first] = 1;
- }
- }//end if
-
- //update groups to reflect all the groups this node represents
- vector<string> nodeGroups;
- map<string, int>::iterator itGroups;
- for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
- nodeGroups.push_back(itGroups->first);
- }
- tree[i].setGroup(nodeGroups);
-
- }//end else
- }//end for
-
+ }
//build the pGroups in non leaf nodes to be used in the parsimony calcs.
for (int i = numLeaves; i < numNodes; i++) {
tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
//copy index in node and tmap
+ setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
tree[i].setIndex(copy->tree[i].getIndex());
- setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
//copy pGroups
tree[i].pGroups = copy->tree[i].pGroups;
try {
//initialize groupNodeInfo
- for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
- groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
+ for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
+ groupNodeInfo[(ct->getNamesOfGroups())[i]].resize(0);
}
for(int i = 0; i < numLeaves; i++){
/*************************************************************************************************/
void Tree::assembleRandomUnifracTree(vector<string> g) {
randomLabels(g);
- map<string, string> empty;
- assembleTree(empty);
+ assembleTree();
}
/*************************************************************************************************/
void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
randomLabels(temp);
- map<string, string> empty;
- assembleTree(empty);
+ assembleTree();
}
/*************************************************************************************************/
//for now it's just random topology but may become random labels as well later that why this is such a simple function now...
void Tree::assembleRandomTree() {
randomTopology();
- map<string, string> empty;
- assembleTree(empty);
+ assembleTree();
}
/**************************************************************************************************/
}
}
}else { //you are a leaf
- string leafGroup = tmap->getGroup(tree[node].getName());
+ vector<string> leafGroup = ct->getGroups(tree[node].getName());
if (mode == "branch") {
- out << leafGroup;
+ out << leafGroup[0];
//if there is a branch length then print it
if (tree[node].getBranchLength() != -1) {
out << ":" << tree[node].getBranchLength();
}
}else if (mode == "boot") {
- out << leafGroup;
+ out << leafGroup[0];
//if there is a label then print it
if (tree[node].getLabel() != -1) {
out << tree[node].getLabel();
}
}
}else { //you are a leaf
- string leafGroup = tmap->getGroup(theseNodes[node].getName());
+ vector<string> leafGroup = ct->getGroups(theseNodes[node].getName());
if (mode == "branch") {
- out << leafGroup;
+ out << leafGroup[0];
//if there is a branch length then print it
if (theseNodes[node].getBranchLength() != -1) {
out << ":" << theseNodes[node].getBranchLength();
}
}else if (mode == "boot") {
- out << leafGroup;
+ out << leafGroup[0];
//if there is a label then print it
if (theseNodes[node].getLabel() != -1) {
out << theseNodes[node].getLabel();
c = filehandle.get();
//k = c;
//cout << k << endl;
- while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {
+ while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {
name += c;
c = filehandle.get();
//k = c;
}
//cout << "name = " << name << endl;
- m->Treenames.push_back(name);
+ if (name != "\r" ) {
+ m->Treenames.push_back(name); } //cout << m->Treenames.size() << '\t' << name << endl;
+
filehandle.putback(c);
//k = c;
//cout << " after putback" << k << endl;