X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=tree.cpp;h=44ecadd534b60d60b602d9e052259ac69709f9d3;hb=ce8794490ab1d83adcdb2b92e0302a1e43e17adf;hp=b99373510ba4d6e9d3e0d323021178227ede92e7;hpb=55386dddad84cc1140d736cabaf4dd0ae16f2e01;p=mothur.git diff --git a/tree.cpp b/tree.cpp index b993735..44ecadd 100644 --- a/tree.cpp +++ b/tree.cpp @@ -16,7 +16,7 @@ Tree::Tree(int num, TreeMap* t) : tmap(t) { numLeaves = num; numNodes = 2*numLeaves - 1; - + tree.resize(numNodes); } catch(exception& e) { @@ -28,9 +28,6 @@ Tree::Tree(int num, TreeMap* t) : tmap(t) { Tree::Tree(string g) { //do not use tree generated by this its just to extract the treenames, its a chicken before the egg thing that needs to be revisited. try { m = MothurOut::getInstance(); - - tmap = NULL; - parseTreeFile(); m->runParse = false; } catch(exception& e) { @@ -89,11 +86,122 @@ Tree::Tree(TreeMap* t) : tmap(t) { exit(1); } } - +/*****************************************************************/ +Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { + try { + m = MothurOut::getInstance(); + + if (m->runParse == true) { parseTreeFile(); m->runParse = false; } + numLeaves = m->Treenames.size(); + numNodes = 2*numLeaves - 1; + + tree.resize(numNodes); + + //initialize groupNodeInfo + for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { + groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0); + } + + //initialize tree with correct number of nodes, name and group info. + for (int i = 0; i < numNodes; i++) { + //initialize leaf nodes + if (i <= (numLeaves-1)) { + tree[i].setName(m->Treenames[i]); + + //save group info + string group = tmap->getGroup(m->Treenames[i]); + + vector tempGroups; tempGroups.push_back(group); + tree[i].setGroup(tempGroups); + groupNodeInfo[group].push_back(i); + + //set pcount and pGroup for groupname to 1. + tree[i].pcount[group] = 1; + tree[i].pGroups[group] = 1; + + //Treemap knows name, group and index to speed up search + tmap->setIndex(m->Treenames[i], i); + + //intialize non leaf nodes + }else if (i > (numLeaves-1)) { + tree[i].setName(""); + vector tempGroups; + tree[i].setGroup(tempGroups); + } + } + + //build tree from matrix + //initialize indexes + map indexes; //maps row in simMatrix to vector index in the tree + for (int g = 0; g < numLeaves; g++) { indexes[g] = g; } + + //do merges and create tree structure by setting parents and children + //there are numGroups - 1 merges to do + for (int i = 0; i < (numLeaves - 1); i++) { + float largest = -1000.0; + + if (m->control_pressed) { break; } + + int row, column; + //find largest value in sims matrix by searching lower triangle + for (int j = 1; j < sims.size(); j++) { + for (int k = 0; k < j; k++) { + if (sims[j][k] > largest) { largest = sims[j][k]; row = j; column = k; } + } + } + + //set non-leaf node info and update leaves to know their parents + //non-leaf + tree[numLeaves + i].setChildren(indexes[row], indexes[column]); + + //parents + tree[indexes[row]].setParent(numLeaves + i); + tree[indexes[column]].setParent(numLeaves + i); + + //blength = distance / 2; + float blength = ((1.0 - largest) / 2); + + //branchlengths + tree[indexes[row]].setBranchLength(blength - tree[indexes[row]].getLengthToLeaves()); + tree[indexes[column]].setBranchLength(blength - tree[indexes[column]].getLengthToLeaves()); + + //set your length to leaves to your childs length plus branchlength + tree[numLeaves + i].setLengthToLeaves(tree[indexes[row]].getLengthToLeaves() + tree[indexes[row]].getBranchLength()); + + + //update index + indexes[row] = numLeaves+i; + indexes[column] = numLeaves+i; + + //remove highest value that caused the merge. + sims[row][column] = -1000.0; + sims[column][row] = -1000.0; + + //merge values in simsMatrix + for (int n = 0; n < sims.size(); n++) { + //row becomes merge of 2 groups + sims[row][n] = (sims[row][n] + sims[column][n]) / 2; + sims[n][row] = sims[row][n]; + //delete column + sims[column][n] = -1000.0; + sims[n][column] = -1000.0; + } + } + + //adjust tree to make sure root to tip length is .5 + int root = findRoot(); + tree[root].setBranchLength((0.5 - tree[root].getLengthToLeaves())); + + } + catch(exception& e) { + m->errorOut(e, "Tree", "Tree"); + exit(1); + } +} /*****************************************************************/ Tree::~Tree() {} /*****************************************************************/ -void Tree::addNamesToCounts() { +void Tree::addNamesToCounts(map nameMap) { try { //ex. seq1 seq2,seq3,se4 // seq1 = pasture @@ -116,12 +224,12 @@ void Tree::addNamesToCounts() { string name = tree[i].getName(); - map::iterator itNames = m->names.find(name); + map::iterator itNames = nameMap.find(name); - if (itNames == m->names.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); } + if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); } else { vector dupNames; - m->splitAtComma(m->names[name], dupNames); + m->splitAtComma(nameMap[name], dupNames); map::iterator itCounts; int maxPars = 1; @@ -217,12 +325,13 @@ void Tree::setIndex(string searchName, int index) { } } /*****************************************************************/ -int Tree::assembleTree() { +int Tree::assembleTree(map nameMap) { try { - //float A = clock(); + //save for later + names = nameMap; //if user has given a names file we want to include that info in the pgroups and pcount info. - if(m->names.size() != 0) { addNamesToCounts(); } + if(nameMap.size() != 0) { addNamesToCounts(nameMap); } //build the pGroups in non leaf nodes to be used in the parsimony calcs. for (int i = numLeaves; i < numNodes; i++) { @@ -231,8 +340,7 @@ int Tree::assembleTree() { tree[i].pGroups = (mergeGroups(i)); tree[i].pcount = (mergeGcounts(i)); } - //float B = clock(); - //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl; + return 0; } catch(exception& e) { @@ -240,7 +348,7 @@ int Tree::assembleTree() { exit(1); } } -/*****************************************************************/ +/***************************************************************** int Tree::assembleTree(string n) { try { @@ -261,9 +369,16 @@ int Tree::assembleTree(string n) { } } /*****************************************************************/ -void Tree::getSubTree(Tree* copy, vector Groups) { +//assumes leaf node names are in groups and no names file - used by indicator command +void Tree::getSubTree(Tree* Ctree, vector Groups) { try { - + + //copy Tree since we are going to destroy it + Tree* copy = new Tree(tmap); + copy->getCopy(Ctree); + map empty; + copy->assembleTree(empty); + //we want to select some of the leaf nodes to create the output tree //go through the input Tree starting at parents of leaves for (int i = 0; i < numNodes; i++) { @@ -408,12 +523,40 @@ void Tree::getSubTree(Tree* copy, vector Groups) { //you found the root if (copy->tree[i].getParent() == -1) { root = i; break; } } - + int nextSpot = numLeaves; populateNewTree(copy->tree, root, nextSpot); + + delete copy; } catch(exception& e) { - m->errorOut(e, "Tree", "getCopy"); + m->errorOut(e, "Tree", "getSubTree"); + exit(1); + } +} +/*****************************************************************/ +//assumes nameMap contains unique names as key or is empty. +//assumes numLeaves defined in tree constructor equals size of seqsToInclude and seqsToInclude only contains unique seqs. +int Tree::getSubTree(Tree* copy, vector seqsToInclude, map nameMap) { + try { + + if (numLeaves != seqsToInclude.size()) { m->mothurOut("[ERROR]: numLeaves does not equal numUniques, cannot create subtree.\n"); m->control_pressed = true; return 0; } + + getSubTree(copy, seqsToInclude); + if (nameMap.size() != 0) { addNamesToCounts(nameMap); } + + //build the pGroups in non leaf nodes to be used in the parsimony calcs. + for (int i = numLeaves; i < numNodes; i++) { + if (m->control_pressed) { return 1; } + + tree[i].pGroups = (mergeGroups(i)); + tree[i].pcount = (mergeGcounts(i)); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Tree", "getSubTree"); exit(1); } } @@ -445,6 +588,37 @@ int Tree::populateNewTree(vector& oldtree, int node, int& index) { } } /*****************************************************************/ +void Tree::getCopy(Tree* copy, map nameMap) { + try { + + //for each node in the tree copy its info + for (int i = 0; i < numNodes; i++) { + //copy branch length + tree[i].setBranchLength(copy->tree[i].getBranchLength()); + + //copy parent + tree[i].setParent(copy->tree[i].getParent()); + + //copy children + tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild()); + } + + if (nameMap.size() != 0) { addNamesToCounts(nameMap); } + + //build the pGroups in non leaf nodes to be used in the parsimony calcs. + for (int i = numLeaves; i < numNodes; i++) { + if (m->control_pressed) { break; } + + tree[i].pGroups = (mergeGroups(i)); + tree[i].pcount = (mergeGcounts(i)); + } + } + catch(exception& e) { + m->errorOut(e, "Tree", "getCopy"); + exit(1); + } +} +/*****************************************************************/ void Tree::getCopy(Tree* copy) { try { @@ -627,7 +801,6 @@ map Tree::mergeGcounts(int position) { } } /**************************************************************************************************/ - void Tree::randomLabels(vector g) { try { @@ -676,36 +849,6 @@ void Tree::randomLabels(vector g) { exit(1); } } -/************************************************************************************************** - -void Tree::randomLabels(string groupA, string groupB) { - try { - int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA]; - int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB]; - - vector randomGroups(numSeqsA+numSeqsB, groupA); - for(int i=numSeqsA;ierrorOut(e, "Tree", "randomLabels"); - exit(1); - } -} /**************************************************************************************************/ void Tree::randomBlengths() { try { @@ -725,21 +868,23 @@ void Tree::randomBlengths() { /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(vector g) { randomLabels(g); - assembleTree("noNameCounts"); + map empty; + assembleTree(empty); } /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(string groupA, string groupB) { - vector temp; temp.push_back(groupA); temp.push_back(groupB); randomLabels(temp); - assembleTree("noNameCounts"); + map empty; + assembleTree(empty); } /*************************************************************************************************/ //for now it's just random topology but may become random labels as well later that why this is such a simple function now... void Tree::assembleRandomTree() { randomTopology(); - assembleTree(); + map empty; + assembleTree(empty); } /**************************************************************************************************/ @@ -792,6 +937,18 @@ void Tree::print(ostream& out) { } } /*****************************************************************/ +void Tree::print(ostream& out, map nameMap) { + try { + int root = findRoot(); + printBranch(root, out, nameMap); + out << ";" << endl; + } + catch(exception& e) { + m->errorOut(e, "Tree", "print"); + exit(1); + } +} +/*****************************************************************/ void Tree::print(ostream& out, string mode) { try { int root = findRoot(); @@ -844,10 +1001,82 @@ int Tree::findRoot() { } } /*****************************************************************/ -void Tree::printBranch(int node, ostream& out, string mode) { +void Tree::printBranch(int node, ostream& out, map names) { try { // you are not a leaf + if (tree[node].getLChild() != -1) { + out << "("; + printBranch(tree[node].getLChild(), out, names); + out << ","; + printBranch(tree[node].getRChild(), out, names); + out << ")"; + + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } + + }else { //you are a leaf + map::iterator itNames = names.find(tree[node].getName()); + + string outputString = ""; + if (itNames != names.end()) { + + vector dupNames; + m->splitAtComma((itNames->second), dupNames); + + if (dupNames.size() == 1) { + outputString += tree[node].getName(); + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + }else { + outputString += "("; + + for (int u = 0; u < dupNames.size()-1; u++) { + outputString += dupNames[u]; + + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(0.0); + } + outputString += ","; + } + + outputString += dupNames[dupNames.size()-1]; + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(0.0); + } + + outputString += ")"; + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + } + }else { + outputString = tree[node].getName(); + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + + m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); + } + + out << outputString; + } + + } + catch(exception& e) { + m->errorOut(e, "Tree", "printBranch"); + exit(1); + } +} +/*****************************************************************/ +void Tree::printBranch(int node, ostream& out, string mode) { + try { + + // you are not a leaf if (tree[node].getLChild() != -1) { out << "("; printBranch(tree[node].getLChild(), out, mode); @@ -872,11 +1101,6 @@ try { if (tree[node].getBranchLength() != -1) { out << ":" << tree[node].getBranchLength(); } - }else if (mode == "deunique") { - //if there is a branch length then print it - if (tree[node].getBranchLength() != -1) { - out << ":" << tree[node].getBranchLength(); - } } }else { //you are a leaf string leafGroup = tmap->getGroup(tree[node].getName()); @@ -902,53 +1126,6 @@ try { if (tree[node].getBranchLength() != -1) { out << ":" << tree[node].getBranchLength(); } - }else if (mode == "deunique") { - map::iterator itNames = m->names.find(tree[node].getName()); - - string outputString = ""; - if (itNames != m->names.end()) { - - vector dupNames; - m->splitAtComma((itNames->second), dupNames); - - if (dupNames.size() == 1) { - outputString += tree[node].getName(); - if (tree[node].getBranchLength() != -1) { - outputString += ":" + toString(tree[node].getBranchLength()); - } - }else { - outputString += "("; - - for (int u = 0; u < dupNames.size()-1; u++) { - outputString += dupNames[u]; - - if (tree[node].getBranchLength() != -1) { - outputString += ":" + toString(0.0); - } - outputString += ","; - } - - outputString += dupNames[dupNames.size()-1]; - if (tree[node].getBranchLength() != -1) { - outputString += ":" + toString(0.0); - } - - outputString += ")"; - if (tree[node].getBranchLength() != -1) { - outputString += ":" + toString(tree[node].getBranchLength()); - } - } - }else { - outputString = tree[node].getName(); - //if there is a branch length then print it - if (tree[node].getBranchLength() != -1) { - outputString += ":" + toString(tree[node].getBranchLength()); - } - - m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); - } - - out << outputString; } }