X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=tree.cpp;h=745893471e065abee12e7752b91c71bb37c1a635;hp=d9b4a9c1e8f4e31475aa5127adb38d339d1a8631;hb=d1c97b8c04bb75faca1e76ffad60b37a4d789d3d;hpb=d2390ede25cc211f95f99e377d5654bad7e6950e diff --git a/tree.cpp b/tree.cpp index d9b4a9c..7458934 100644 --- a/tree.cpp +++ b/tree.cpp @@ -10,7 +10,7 @@ #include "tree.h" /*****************************************************************/ -Tree::Tree(int num, TreeMap* t) : tmap(t) { +Tree::Tree(int num, CountTable* t) : ct(t) { try { m = MothurOut::getInstance(); @@ -36,21 +36,20 @@ Tree::Tree(string g) { //do not use tree generated by this its just to extract t } } /*****************************************************************/ -Tree::Tree(TreeMap* t) : tmap(t) { +Tree::Tree(CountTable* t) : ct(t) { try { m = MothurOut::getInstance(); if (m->runParse == true) { parseTreeFile(); m->runParse = false; } -//for(int i = 0; i < globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl; } + numLeaves = m->Treenames.size(); numNodes = 2*numLeaves - 1; tree.resize(numNodes); //initialize groupNodeInfo - for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { - groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0); - } + vector namesOfGroups = ct->getNamesOfGroups(); + for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { @@ -59,19 +58,35 @@ Tree::Tree(TreeMap* t) : tmap(t) { tree[i].setName(m->Treenames[i]); //save group info - string group = tmap->getGroup(m->Treenames[i]); - - vector tempGroups; tempGroups.push_back(group); - tree[i].setGroup(tempGroups); - groupNodeInfo[group].push_back(i); - - //set pcount and pGroup for groupname to 1. - tree[i].pcount[group] = 1; - tree[i].pGroups[group] = 1; - - //Treemap knows name, group and index to speed up search - tmap->setIndex(m->Treenames[i], i); - + int maxPars = 1; + vector group; + vector counts = ct->getGroupCounts(m->Treenames[i]); + for (int j = 0; j < namesOfGroups.size(); j++) { + if (counts[j] != 0) { //you have seqs from this group + groupNodeInfo[namesOfGroups[j]].push_back(i); + group.push_back(namesOfGroups[j]); + tree[i].pGroups[namesOfGroups[j]] = counts[j]; + tree[i].pcount[namesOfGroups[j]] = counts[j]; + //keep highest group + if(counts[j] > maxPars){ maxPars = counts[j]; } + } + } + tree[i].setGroup(group); + setIndex(m->Treenames[i], i); + + if (maxPars > 1) { //then we have some more dominant groups + //erase all the groups that are less than maxPars because you found a more dominant group. + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ + if(it->second < maxPars){ + tree[i].pGroups.erase(it++); + }else { it++; } + } + //set one remaining groups to 1 + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ + tree[i].pGroups[it->first] = 1; + } + }//end if + //intialize non leaf nodes }else if (i > (numLeaves-1)) { tree[i].setName(""); @@ -87,7 +102,7 @@ Tree::Tree(TreeMap* t) : tmap(t) { } } /*****************************************************************/ -Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { +Tree::Tree(CountTable* t, vector< vector >& sims) : ct(t) { try { m = MothurOut::getInstance(); @@ -98,9 +113,8 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { tree.resize(numNodes); //initialize groupNodeInfo - for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { - groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0); - } + vector namesOfGroups = ct->getNamesOfGroups(); + for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { @@ -109,18 +123,34 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { tree[i].setName(m->Treenames[i]); //save group info - string group = tmap->getGroup(m->Treenames[i]); - - vector tempGroups; tempGroups.push_back(group); - tree[i].setGroup(tempGroups); - groupNodeInfo[group].push_back(i); - - //set pcount and pGroup for groupname to 1. - tree[i].pcount[group] = 1; - tree[i].pGroups[group] = 1; - - //Treemap knows name, group and index to speed up search - tmap->setIndex(m->Treenames[i], i); + int maxPars = 1; + vector group; + vector counts = ct->getGroupCounts(m->Treenames[i]); + for (int j = 0; j < namesOfGroups.size(); j++) { + if (counts[j] != 0) { //you have seqs from this group + groupNodeInfo[namesOfGroups[j]].push_back(i); + group.push_back(namesOfGroups[j]); + tree[i].pGroups[namesOfGroups[j]] = counts[j]; + tree[i].pcount[namesOfGroups[j]] = counts[j]; + //keep highest group + if(counts[j] > maxPars){ maxPars = counts[j]; } + } + } + tree[i].setGroup(group); + setIndex(m->Treenames[i], i); + + if (maxPars > 1) { //then we have some more dominant groups + //erase all the groups that are less than maxPars because you found a more dominant group. + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ + if(it->second < maxPars){ + tree[i].pGroups.erase(it++); + }else { it++; } + } + //set one remaining groups to 1 + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ + tree[i].pGroups[it->first] = 1; + } + }//end if //intialize non leaf nodes }else if (i > (numLeaves-1)) { @@ -129,11 +159,12 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { tree[i].setGroup(tempGroups); } } + //build tree from matrix //initialize indexes - map indexes; //maps row in simMatrix to vector index in the tree - for (int g = 0; g < numLeaves; g++) { indexes[g] = g; } + map thisIndexes; //maps row in simMatrix to vector index in the tree + for (int g = 0; g < numLeaves; g++) { thisIndexes[g] = g; } //do merges and create tree structure by setting parents and children //there are numGroups - 1 merges to do @@ -152,26 +183,26 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { //set non-leaf node info and update leaves to know their parents //non-leaf - tree[numLeaves + i].setChildren(indexes[row], indexes[column]); + tree[numLeaves + i].setChildren(thisIndexes[row], thisIndexes[column]); //parents - tree[indexes[row]].setParent(numLeaves + i); - tree[indexes[column]].setParent(numLeaves + i); + tree[thisIndexes[row]].setParent(numLeaves + i); + tree[thisIndexes[column]].setParent(numLeaves + i); //blength = distance / 2; float blength = ((1.0 - largest) / 2); //branchlengths - tree[indexes[row]].setBranchLength(blength - tree[indexes[row]].getLengthToLeaves()); - tree[indexes[column]].setBranchLength(blength - tree[indexes[column]].getLengthToLeaves()); + tree[thisIndexes[row]].setBranchLength(blength - tree[thisIndexes[row]].getLengthToLeaves()); + tree[thisIndexes[column]].setBranchLength(blength - tree[thisIndexes[column]].getLengthToLeaves()); //set your length to leaves to your childs length plus branchlength - tree[numLeaves + i].setLengthToLeaves(tree[indexes[row]].getLengthToLeaves() + tree[indexes[row]].getBranchLength()); + tree[numLeaves + i].setLengthToLeaves(tree[thisIndexes[row]].getLengthToLeaves() + tree[thisIndexes[row]].getBranchLength()); //update index - indexes[row] = numLeaves+i; - indexes[column] = numLeaves+i; + thisIndexes[row] = numLeaves+i; + thisIndexes[column] = numLeaves+i; //remove highest value that caused the merge. sims[row][column] = -1000.0; @@ -200,7 +231,7 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { } /*****************************************************************/ Tree::~Tree() {} -/*****************************************************************/ +/***************************************************************** void Tree::addNamesToCounts(map nameMap) { try { //ex. seq1 seq2,seq3,se4 @@ -297,15 +328,15 @@ void Tree::addNamesToCounts(map nameMap) { m->errorOut(e, "Tree", "addNamesToCounts"); exit(1); } -} +}*/ /*****************************************************************/ int Tree::getIndex(string searchName) { try { - //Treemap knows name, group and index to speed up search - // getIndex function will return the vector index or -1 if seq is not found. - int index = tmap->getIndex(searchName); - return index; - + map::iterator itIndex = indexes.find(searchName); + if (itIndex != indexes.end()) { + return itIndex->second; + } + return -1; } catch(exception& e) { m->errorOut(e, "Tree", "getIndex"); @@ -316,8 +347,10 @@ int Tree::getIndex(string searchName) { void Tree::setIndex(string searchName, int index) { try { - //set index in treemap - tmap->setIndex(searchName, index); + map::iterator itIndex = indexes.find(searchName); + if (itIndex == indexes.end()) { + indexes[searchName] = index; + } } catch(exception& e) { m->errorOut(e, "Tree", "setIndex"); @@ -325,14 +358,8 @@ void Tree::setIndex(string searchName, int index) { } } /*****************************************************************/ -int Tree::assembleTree(map nameMap) { - try { - //save for later - names = nameMap; - - //if user has given a names file we want to include that info in the pgroups and pcount info. - if(nameMap.size() != 0) { addNamesToCounts(nameMap); } - +int Tree::assembleTree() { + try { //build the pGroups in non leaf nodes to be used in the parsimony calcs. for (int i = numLeaves; i < numNodes; i++) { if (m->control_pressed) { return 1; } @@ -348,66 +375,66 @@ int Tree::assembleTree(map nameMap) { exit(1); } } -/***************************************************************** -int Tree::assembleTree(string n) { - try { - - //build the pGroups in non leaf nodes to be used in the parsimony calcs. - for (int i = numLeaves; i < numNodes; i++) { - if (m->control_pressed) { return 1; } - - tree[i].pGroups = (mergeGroups(i)); - tree[i].pcount = (mergeGcounts(i)); - } - //float B = clock(); - //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl; - return 0; - } - catch(exception& e) { - m->errorOut(e, "Tree", "assembleTree"); - exit(1); - } -} /*****************************************************************/ //assumes leaf node names are in groups and no names file - used by indicator command void Tree::getSubTree(Tree* Ctree, vector Groups) { try { //copy Tree since we are going to destroy it - Tree* copy = new Tree(tmap); + Tree* copy = new Tree(ct); copy->getCopy(Ctree); - map empty; - copy->assembleTree(empty); + copy->assembleTree(); //we want to select some of the leaf nodes to create the output tree //go through the input Tree starting at parents of leaves + //initialize groupNodeInfo + vector namesOfGroups = ct->getNamesOfGroups(); + for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } + + //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { - //initialize leaf nodes if (i <= (numLeaves-1)) { tree[i].setName(Groups[i]); //save group info - string group = tmap->getGroup(Groups[i]); - vector tempGroups; tempGroups.push_back(group); - tree[i].setGroup(tempGroups); - groupNodeInfo[group].push_back(i); - - //set pcount and pGroup for groupname to 1. - tree[i].pcount[group] = 1; - tree[i].pGroups[group] = 1; - - //Treemap knows name, group and index to speed up search - tmap->setIndex(Groups[i], i); - - //intialize non leaf nodes + int maxPars = 1; + vector group; + vector counts = ct->getGroupCounts(Groups[i]); + for (int j = 0; j < namesOfGroups.size(); j++) { + if (counts[j] != 0) { //you have seqs from this group + groupNodeInfo[namesOfGroups[j]].push_back(i); + group.push_back(namesOfGroups[j]); + tree[i].pGroups[namesOfGroups[j]] = counts[j]; + tree[i].pcount[namesOfGroups[j]] = counts[j]; + //keep highest group + if(counts[j] > maxPars){ maxPars = counts[j]; } + } + } + tree[i].setGroup(group); + setIndex(Groups[i], i); + + if (maxPars > 1) { //then we have some more dominant groups + //erase all the groups that are less than maxPars because you found a more dominant group. + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ + if(it->second < maxPars){ + tree[i].pGroups.erase(it++); + }else { it++; } + } + //set one remaining groups to 1 + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ + tree[i].pGroups[it->first] = 1; + } + }//end if + + //intialize non leaf nodes }else if (i > (numLeaves-1)) { tree[i].setName(""); vector tempGroups; tree[i].setGroup(tempGroups); } } - + set removedLeaves; for (int i = 0; i < copy->getNumLeaves(); i++) { @@ -534,7 +561,7 @@ void Tree::getSubTree(Tree* Ctree, vector Groups) { exit(1); } } -/*****************************************************************/ +/***************************************************************** //assumes nameMap contains unique names as key or is empty. //assumes numLeaves defined in tree constructor equals size of seqsToInclude and seqsToInclude only contains unique seqs. int Tree::getSubTree(Tree* copy, vector seqsToInclude, map nameMap) { @@ -578,7 +605,7 @@ int Tree::populateNewTree(vector& oldtree, int node, int& index) { return (index++); }else { //you are a leaf - int indexInNewTree = tmap->getIndex(oldtree[node].getName()); + int indexInNewTree = getIndex(oldtree[node].getName()); return indexInNewTree; } } @@ -588,6 +615,35 @@ int Tree::populateNewTree(vector& oldtree, int node, int& index) { } } /*****************************************************************/ +void Tree::getCopy(Tree* copy, bool subsample) { + try { + + //for each node in the tree copy its info + for (int i = 0; i < numNodes; i++) { + //copy branch length + tree[i].setBranchLength(copy->tree[i].getBranchLength()); + + //copy parent + tree[i].setParent(copy->tree[i].getParent()); + + //copy children + tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild()); + } + + //build the pGroups in non leaf nodes to be used in the parsimony calcs. + for (int i = numLeaves; i < numNodes; i++) { + if (m->control_pressed) { break; } + + tree[i].pGroups = (mergeGroups(i)); + tree[i].pcount = (mergeGcounts(i)); + } + } + catch(exception& e) { + m->errorOut(e, "Tree", "getCopy"); + exit(1); + } +} +/*****************************************************************/ void Tree::getCopy(Tree* copy) { try { @@ -609,8 +665,8 @@ void Tree::getCopy(Tree* copy) { tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild()); //copy index in node and tmap + setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName())); tree[i].setIndex(copy->tree[i].getIndex()); - setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName())); //copy pGroups tree[i].pGroups = copy->tree[i].pGroups; @@ -774,8 +830,8 @@ void Tree::randomLabels(vector g) { try { //initialize groupNodeInfo - for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { - groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0); + for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) { + groupNodeInfo[(ct->getNamesOfGroups())[i]].resize(0); } for(int i = 0; i < numLeaves; i++){ @@ -837,23 +893,20 @@ void Tree::randomBlengths() { /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(vector g) { randomLabels(g); - map empty; - assembleTree(empty); + assembleTree(); } /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(string groupA, string groupB) { vector temp; temp.push_back(groupA); temp.push_back(groupB); randomLabels(temp); - map empty; - assembleTree(empty); + assembleTree(); } /*************************************************************************************************/ //for now it's just random topology but may become random labels as well later that why this is such a simple function now... void Tree::assembleRandomTree() { randomTopology(); - map empty; - assembleTree(empty); + assembleTree(); } /**************************************************************************************************/ @@ -1072,16 +1125,16 @@ void Tree::printBranch(int node, ostream& out, string mode) { } } }else { //you are a leaf - string leafGroup = tmap->getGroup(tree[node].getName()); + vector leafGroup = ct->getGroups(tree[node].getName()); if (mode == "branch") { - out << leafGroup; + out << leafGroup[0]; //if there is a branch length then print it if (tree[node].getBranchLength() != -1) { out << ":" << tree[node].getBranchLength(); } }else if (mode == "boot") { - out << leafGroup; + out << leafGroup[0]; //if there is a label then print it if (tree[node].getLabel() != -1) { out << tree[node].getLabel(); @@ -1135,16 +1188,16 @@ void Tree::printBranch(int node, ostream& out, string mode, vector& theseN } } }else { //you are a leaf - string leafGroup = tmap->getGroup(theseNodes[node].getName()); + vector leafGroup = ct->getGroups(theseNodes[node].getName()); if (mode == "branch") { - out << leafGroup; + out << leafGroup[0]; //if there is a branch length then print it if (theseNodes[node].getBranchLength() != -1) { out << ":" << theseNodes[node].getBranchLength(); } }else if (mode == "boot") { - out << leafGroup; + out << leafGroup[0]; //if there is a label then print it if (theseNodes[node].getLabel() != -1) { out << theseNodes[node].getLabel(); @@ -1180,7 +1233,7 @@ void Tree::printTree() { /*****************************************************************/ //this code is a mess and should be rethought...-slw -void Tree::parseTreeFile() { +int Tree::parseTreeFile() { //only takes names from the first tree and assumes that all trees use the same names. try { @@ -1193,8 +1246,10 @@ void Tree::parseTreeFile() { //ifyou are not a nexus file if((c = filehandle.peek()) != '#') { - while((c = filehandle.peek()) != ';') { + while((c = filehandle.peek()) != ';') { + if (m->control_pressed) { filehandle.close(); return 0; } while ((c = filehandle.peek()) != ';') { + if (m->control_pressed) { filehandle.close(); return 0; } // get past comments if(c == '[') { comment = 1; @@ -1214,7 +1269,8 @@ void Tree::parseTreeFile() { string holder = ""; // get past comments - while(holder != "translate" && holder != "Translate"){ + while(holder != "translate" && holder != "Translate"){ + if (m->control_pressed) { filehandle.close(); return 0; } if(holder == "[" || holder == "[!"){ comment = 1; } @@ -1243,7 +1299,8 @@ void Tree::parseTreeFile() { string number, name, h; h = ""; // so it enters the loop the first time - while((h != ";") && (number != ";")) { + while((h != ";") && (number != ";")) { + if (m->control_pressed) { filehandle.close(); return 0; } filehandle >> number; filehandle >> name; @@ -1256,7 +1313,7 @@ void Tree::parseTreeFile() { } } filehandle.close(); - + return 0; //for (int i = 0; i < globaldata->Treenames.size(); i++) { //cout << globaldata->Treenames[i] << endl; } //cout << globaldata->Treenames.size() << endl; @@ -1274,7 +1331,8 @@ int Tree::readTreeString(ifstream& filehandle) { int c; string name; //, k - while((c = filehandle.peek()) != ';') { + while((c = filehandle.peek()) != ';') { + if (m->control_pressed) { return 0; } //k = c; //cout << " at beginning of while " << k << endl; if(c == ')') { @@ -1291,7 +1349,7 @@ int Tree::readTreeString(ifstream& filehandle) { c = filehandle.get(); //k = c; //cout << k << endl; - while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { + while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { name += c; c = filehandle.get(); //k = c; @@ -1299,7 +1357,9 @@ int Tree::readTreeString(ifstream& filehandle) { } //cout << "name = " << name << endl; - m->Treenames.push_back(name); + if (name != "\r" ) { + m->Treenames.push_back(name); } //cout << m->Treenames.size() << '\t' << name << endl; + filehandle.putback(c); //k = c; //cout << " after putback" << k << endl;