X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=tree.cpp;h=c67f03d8f064f6699c192e62184df6763576b113;hb=3247d888e7aafc4a65ec9062a94dfd166c2c5b1d;hp=400a72ae388a88c5b94aa2b8a717471f600b33ed;hpb=2a7d1455e8cfe4f67a7173f3a7249762c5436217;p=mothur.git diff --git a/tree.cpp b/tree.cpp index 400a72a..c67f03d 100644 --- a/tree.cpp +++ b/tree.cpp @@ -9,7 +9,35 @@ #include "tree.h" - +/*****************************************************************/ +Tree::Tree(int num) { + try { + globaldata = GlobalData::getInstance(); + m = MothurOut::getInstance(); + + numLeaves = num; + numNodes = 2*numLeaves - 1; + + tree.resize(numNodes); + } + catch(exception& e) { + m->errorOut(e, "Tree", "Tree - numNodes"); + exit(1); + } +} +/*****************************************************************/ +Tree::Tree(string g) { + try { + globaldata = GlobalData::getInstance(); + m = MothurOut::getInstance(); + + parseTreeFile(); globaldata->runParse = false; + } + catch(exception& e) { + m->errorOut(e, "Tree", "Tree - just parse"); + exit(1); + } +} /*****************************************************************/ Tree::Tree() { try { @@ -22,17 +50,28 @@ Tree::Tree() { numNodes = 2*numLeaves - 1; tree.resize(numNodes); + + //initialize groupNodeInfo + for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) { + groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0); + } //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { //initialize leaf nodes if (i <= (numLeaves-1)) { tree[i].setName(globaldata->Treenames[i]); - vector tempGroups; tempGroups.push_back(globaldata->gTreemap->getGroup(globaldata->Treenames[i])); + + //save group info + string group = globaldata->gTreemap->getGroup(globaldata->Treenames[i]); + vector tempGroups; tempGroups.push_back(group); tree[i].setGroup(tempGroups); + groupNodeInfo[group].push_back(i); + //set pcount and pGroup for groupname to 1. - tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1; - tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1; + tree[i].pcount[group] = 1; + tree[i].pGroups[group] = 1; + //Treemap knows name, group and index to speed up search globaldata->gTreemap->setIndex(globaldata->Treenames[i], i); @@ -69,6 +108,9 @@ void Tree::addNamesToCounts() { //go through each leaf and update its pcounts and pgroups + + //float A = clock(); + for (int i = 0; i < numLeaves; i++) { string name = tree[i].getName(); @@ -78,14 +120,18 @@ void Tree::addNamesToCounts() { if (itNames == globaldata->names.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); } else { vector dupNames; - splitAtComma(globaldata->names[name], dupNames); + m->splitAtComma(globaldata->names[name], dupNames); map::iterator itCounts; int maxPars = 1; + set groupsAddedForThisNode; for (int j = 0; j < dupNames.size(); j++) { - + + string group = globaldata->gTreemap->getGroup(dupNames[j]); + if (dupNames[j] != name) {//you already added yourself in the constructor - string group = globaldata->gTreemap->getGroup(dupNames[j]); + + if (groupsAddedForThisNode.count(group) == 0) { groupNodeInfo[group].push_back(i); groupsAddedForThisNode.insert(group); } //if you have not already added this node for this group, then add it //update pcounts itCounts = tree[i].pcount.find(group); @@ -99,7 +145,7 @@ void Tree::addNamesToCounts() { itCounts = tree[i].pGroups.find(group); if (itCounts == tree[i].pGroups.end()) { //new group, add it tree[i].pGroups[group] = 1; - }else { + }else{ tree[i].pGroups[group]++; } @@ -107,7 +153,7 @@ void Tree::addNamesToCounts() { if(tree[i].pGroups[group] > maxPars){ maxPars = tree[i].pGroups[group]; } - }//end if + }else { groupsAddedForThisNode.insert(group); } //add it so you don't add it to groupNodeInfo again }//end for if (maxPars > 1) { //then we have some more dominant groups @@ -132,7 +178,11 @@ void Tree::addNamesToCounts() { tree[i].setGroup(nodeGroups); }//end else - }//end for + }//end for + + //float B = clock(); + //cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl; + } catch(exception& e) { m->errorOut(e, "Tree", "addNamesToCounts"); @@ -168,7 +218,8 @@ void Tree::setIndex(string searchName, int index) { /*****************************************************************/ int Tree::assembleTree() { try { - + //float A = clock(); + //if user has given a names file we want to include that info in the pgroups and pcount info. if(globaldata->names.size() != 0) { addNamesToCounts(); } @@ -179,7 +230,28 @@ int Tree::assembleTree() { tree[i].pGroups = (mergeGroups(i)); tree[i].pcount = (mergeGcounts(i)); } + //float B = clock(); + //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl; + return 0; + } + catch(exception& e) { + m->errorOut(e, "Tree", "assembleTree"); + exit(1); + } +} +/*****************************************************************/ +int Tree::assembleTree(string n) { + try { + //build the pGroups in non leaf nodes to be used in the parsimony calcs. + for (int i = numLeaves; i < numNodes; i++) { + if (m->control_pressed) { return 1; } + + tree[i].pGroups = (mergeGroups(i)); + tree[i].pcount = (mergeGcounts(i)); + } + //float B = clock(); + //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl; return 0; } catch(exception& e) { @@ -188,6 +260,182 @@ int Tree::assembleTree() { } } /*****************************************************************/ +void Tree::getSubTree(Tree* copy, vector Groups) { + try { + + //we want to select some of the leaf nodes to create the output tree + //go through the input Tree starting at parents of leaves + for (int i = 0; i < numNodes; i++) { + + //initialize leaf nodes + if (i <= (numLeaves-1)) { + tree[i].setName(Groups[i]); + + //save group info + string group = globaldata->gTreemap->getGroup(Groups[i]); + vector tempGroups; tempGroups.push_back(group); + tree[i].setGroup(tempGroups); + groupNodeInfo[group].push_back(i); + + //set pcount and pGroup for groupname to 1. + tree[i].pcount[group] = 1; + tree[i].pGroups[group] = 1; + + //Treemap knows name, group and index to speed up search + globaldata->gTreemap->setIndex(Groups[i], i); + + //intialize non leaf nodes + }else if (i > (numLeaves-1)) { + tree[i].setName(""); + vector tempGroups; + tree[i].setGroup(tempGroups); + } + } + + set removedLeaves; + for (int i = 0; i < copy->getNumLeaves(); i++) { + + if (removedLeaves.count(i) == 0) { + + //am I in the group + int parent = copy->tree[i].getParent(); + + if (parent != -1) { + + if (m->inUsersGroups(copy->tree[i].getName(), Groups)) { + //find my siblings name + int parentRC = copy->tree[parent].getRChild(); + int parentLC = copy->tree[parent].getLChild(); + + //if I am the right child, then my sib is the left child + int sibIndex = parentRC; + if (parentRC == i) { sibIndex = parentLC; } + + string sibsName = copy->tree[sibIndex].getName(); + + //if yes, is my sibling + if ((m->inUsersGroups(sibsName, Groups)) || (sibsName == "")) { + //we both are okay no trimming required + }else{ + //i am, my sib is not, so remove sib by setting my parent to my grandparent + int grandparent = copy->tree[parent].getParent(); + int grandparentLC = copy->tree[grandparent].getLChild(); + int grandparentRC = copy->tree[grandparent].getRChild(); + + //whichever of my granparents children was my parent now equals me + if (grandparentLC == parent) { grandparentLC = i; } + else { grandparentRC = i; } + + copy->tree[i].setParent(grandparent); + copy->tree[i].setBranchLength((copy->tree[i].getBranchLength()+copy->tree[parent].getBranchLength())); + copy->tree[grandparent].setChildren(grandparentLC, grandparentRC); + removedLeaves.insert(sibIndex); + } + }else{ + //find my siblings name + int parentRC = copy->tree[parent].getRChild(); + int parentLC = copy->tree[parent].getLChild(); + + //if I am the right child, then my sib is the left child + int sibIndex = parentRC; + if (parentRC == i) { sibIndex = parentLC; } + + string sibsName = copy->tree[sibIndex].getName(); + + //if no is my sibling + if ((m->inUsersGroups(sibsName, Groups)) || (sibsName == "")) { + //i am not, but my sib is + int grandparent = copy->tree[parent].getParent(); + int grandparentLC = copy->tree[grandparent].getLChild(); + int grandparentRC = copy->tree[grandparent].getRChild(); + + //whichever of my granparents children was my parent now equals my sib + if (grandparentLC == parent) { grandparentLC = sibIndex; } + else { grandparentRC = sibIndex; } + + copy->tree[sibIndex].setParent(grandparent); + copy->tree[sibIndex].setBranchLength((copy->tree[sibIndex].getBranchLength()+copy->tree[parent].getBranchLength())); + copy->tree[grandparent].setChildren(grandparentLC, grandparentRC); + removedLeaves.insert(i); + }else{ + //neither of us are, so we want to eliminate ourselves and our parent + //so set our parents sib to our great-grandparent + int parent = copy->tree[i].getParent(); + int grandparent = copy->tree[parent].getParent(); + int parentsSibIndex; + if (grandparent != -1) { + int greatgrandparent = copy->tree[grandparent].getParent(); + int greatgrandparentLC = copy->tree[greatgrandparent].getLChild(); + int greatgrandparentRC = copy->tree[greatgrandparent].getRChild(); + + int grandparentLC = copy->tree[grandparent].getLChild(); + int grandparentRC = copy->tree[grandparent].getRChild(); + + parentsSibIndex = grandparentLC; + if (grandparentLC == parent) { parentsSibIndex = grandparentRC; } + + //whichever of my greatgrandparents children was my grandparent + if (greatgrandparentLC == grandparent) { greatgrandparentLC = parentsSibIndex; } + else { greatgrandparentRC = parentsSibIndex; } + + copy->tree[parentsSibIndex].setParent(greatgrandparent); + copy->tree[parentsSibIndex].setBranchLength((copy->tree[parentsSibIndex].getBranchLength()+copy->tree[grandparent].getBranchLength())); + copy->tree[greatgrandparent].setChildren(greatgrandparentLC, greatgrandparentRC); + }else{ + copy->tree[parent].setChildren(-1, -1); + cout << "issues with making subtree" << endl; + } + removedLeaves.insert(sibIndex); + removedLeaves.insert(i); + } + } + } + } + } + + int root = 0; + for (int i = 0; i < copy->getNumNodes(); i++) { + //you found the root + if (copy->tree[i].getParent() == -1) { root = i; break; } + } + + int nextSpot = numLeaves; + populateNewTree(copy->tree, root, nextSpot); + + } + catch(exception& e) { + m->errorOut(e, "Tree", "getCopy"); + exit(1); + } +} +/*****************************************************************/ +int Tree::populateNewTree(vector& oldtree, int node, int& index) { + try { + + if (oldtree[node].getLChild() != -1) { + int rc = populateNewTree(oldtree, oldtree[node].getLChild(), index); + int lc = populateNewTree(oldtree, oldtree[node].getRChild(), index); + + tree[index].setChildren(lc, rc); + tree[rc].setParent(index); + tree[lc].setParent(index); + + tree[index].setBranchLength(oldtree[node].getBranchLength()); + tree[rc].setBranchLength(oldtree[oldtree[node].getLChild()].getBranchLength()); + tree[lc].setBranchLength(oldtree[oldtree[node].getRChild()].getBranchLength()); + + return (index++); + }else { //you are a leaf + int indexInNewTree = globaldata->gTreemap->getIndex(oldtree[node].getName()); + return indexInNewTree; + } + } + catch(exception& e) { + m->errorOut(e, "Tree", "populateNewTree"); + exit(1); + } +} +/*****************************************************************/ void Tree::getCopy(Tree* copy) { try { @@ -219,6 +467,8 @@ void Tree::getCopy(Tree* copy) { tree[i].pcount = copy->tree[i].pcount; } + groupNodeInfo = copy->groupNodeInfo; + } catch(exception& e) { m->errorOut(e, "Tree", "getCopy"); @@ -292,14 +542,14 @@ map Tree::mergeUserGroups(int i, vector g) { //loop through nodes groups removing the ones the user doesn't want for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){ - if (inUsersGroups(it->first, g) != true) { + if (m->inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it++); }else { it++; } } //loop through nodes groups removing the ones the user doesn't want for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){ - if (inUsersGroups(it->first, g) != true) { + if (m->inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it++); }else { it++; } } @@ -371,6 +621,11 @@ map Tree::mergeGcounts(int position) { void Tree::randomLabels(vector g) { try { + + //initialize groupNodeInfo + for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) { + groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0); + } for(int i = 0; i < numLeaves; i++){ int z; @@ -381,8 +636,8 @@ void Tree::randomLabels(vector g) { //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them. bool treez, treei; - treez = inUsersGroups(tree[z].getGroup(), g); - treei = inUsersGroups(tree[i].getGroup(), g); + treez = m->inUsersGroups(tree[z].getGroup(), g); + treei = m->inUsersGroups(tree[i].getGroup(), g); if ((treez == true) && (treei == true)) { //switches node i and node z's info. @@ -402,6 +657,9 @@ void Tree::randomLabels(vector g) { tree[z].pcount = (tree[i].pcount); tree[i].pcount = (gcount_hold); } + + for (int k = 0; k < (tree[i].getGroup()).size(); k++) { groupNodeInfo[(tree[i].getGroup())[k]].push_back(i); } + for (int k = 0; k < (tree[z].getGroup()).size(); k++) { groupNodeInfo[(tree[z].getGroup())[k]].push_back(z); } } } catch(exception& e) { @@ -458,14 +716,14 @@ void Tree::randomBlengths() { /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(vector g) { randomLabels(g); - assembleTree(); + assembleTree("noNameCounts"); } /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(string groupA, string groupB) { vector temp; temp.push_back(groupA); temp.push_back(groupB); randomLabels(temp); - assembleTree(); + assembleTree("noNameCounts"); } /*************************************************************************************************/ @@ -525,27 +783,26 @@ void Tree::print(ostream& out) { } } /*****************************************************************/ -void Tree::printForBoot(ostream& out) { +void Tree::print(ostream& out, string mode) { try { int root = findRoot(); - printBranch(root, out, "boot"); + printBranch(root, out, mode); out << ";" << endl; } catch(exception& e) { - m->errorOut(e, "Tree", "printForBoot"); + m->errorOut(e, "Tree", "print"); exit(1); } } - /*****************************************************************/ // This prints out the tree in Newick form. void Tree::createNewickFile(string f) { try { int root = findRoot(); - //filename = getRootName(globaldata->getTreeFile()) + "newick"; + //filename = m->getRootName(globaldata->getTreeFile()) + "newick"; filename = f; - openOutputFile(filename, out); + m->openOutputFile(filename, out); printBranch(root, out, "branch"); @@ -577,12 +834,11 @@ int Tree::findRoot() { exit(1); } } - /*****************************************************************/ void Tree::printBranch(int node, ostream& out, string mode) { - try { - - // you are not a leaf +try { + +// you are not a leaf if (tree[node].getLChild() != -1) { out << "("; printBranch(tree[node].getLChild(), out, mode); @@ -599,21 +855,102 @@ void Tree::printBranch(int node, ostream& out, string mode) { if (tree[node].getLabel() != -1) { out << tree[node].getLabel(); } + }else if (mode == "both") { + if (tree[node].getLabel() != -1) { + out << tree[node].getLabel(); + } + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } } }else { //you are a leaf string leafGroup = globaldata->gTreemap->getGroup(tree[node].getName()); - out << leafGroup; if (mode == "branch") { + out << leafGroup; //if there is a branch length then print it if (tree[node].getBranchLength() != -1) { out << ":" << tree[node].getBranchLength(); } }else if (mode == "boot") { + out << leafGroup; //if there is a label then print it if (tree[node].getLabel() != -1) { out << tree[node].getLabel(); } + }else if (mode == "both") { + out << tree[node].getName(); + if (tree[node].getLabel() != -1) { + out << tree[node].getLabel(); + } + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } + } + } + + } + catch(exception& e) { + m->errorOut(e, "Tree", "printBranch"); + exit(1); + } +} +/*****************************************************************/ +void Tree::printBranch(int node, ostream& out, string mode, vector& theseNodes) { + try { + + // you are not a leaf + if (theseNodes[node].getLChild() != -1) { + out << "("; + printBranch(theseNodes[node].getLChild(), out, mode); + out << ","; + printBranch(theseNodes[node].getRChild(), out, mode); + out << ")"; + if (mode == "branch") { + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + }else if (mode == "boot") { + //if there is a label then print it + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + }else if (mode == "both") { + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + } + }else { //you are a leaf + string leafGroup = globaldata->gTreemap->getGroup(theseNodes[node].getName()); + + if (mode == "branch") { + out << leafGroup; + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + }else if (mode == "boot") { + out << leafGroup; + //if there is a label then print it + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + }else if (mode == "both") { + out << theseNodes[node].getName(); + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } } } @@ -623,7 +960,6 @@ void Tree::printBranch(int node, ostream& out, string mode) { exit(1); } } - /*****************************************************************/ void Tree::printTree() { @@ -643,7 +979,7 @@ void Tree::parseTreeFile() { try { string filename = globaldata->getTreeFile(); ifstream filehandle; - openInputFile(filename, filehandle); + m->openInputFile(filename, filehandle); int c, comment; comment = 0; int done = 1; @@ -736,20 +1072,39 @@ int Tree::readTreeString(ifstream& filehandle) { //cout << " at beginning of while " << k << endl; if(c == ')') { //to pass over labels in trees - string label = readLabel(filehandle); + c=filehandle.get(); + while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); } + filehandle.putback(c); } - if(c == ';') { return 0; } if(c == -1) { return 0; } - //if you are a name if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space - name = readName(filehandle); + name = ""; + c = filehandle.get(); + //k = c; +//cout << k << endl; + while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { + name += c; + c = filehandle.get(); + //k = c; +//cout << " in name while " << k << endl; + } + +//cout << "name = " << name << endl; globaldata->Treenames.push_back(name); + filehandle.putback(c); +//k = c; +//cout << " after putback" << k << endl; } if(c == ':') { //read until you reach the end of the branch length - string bl = readBranchLength(filehandle); + while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { + c = filehandle.get(); + //k = c; + //cout << " in branch while " << k << endl; + } + filehandle.putback(c); } c = filehandle.get(); @@ -770,68 +1125,6 @@ int Tree::readTreeString(ifstream& filehandle) { } /*******************************************************/ -string Tree::readLabel(ifstream& filehandle) { - try { - - string label = ""; - - //to pass over labels in trees - int c=filehandle.get(); - while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ label += c; c=filehandle.get(); } - filehandle.putback(c); - - return label; - - } - catch(exception& e) { - m->errorOut(e, "Tree", "readLabel"); - exit(1); - } -} -/*******************************************************/ -string Tree::readName(ifstream& filehandle) { - try { - - string name = ""; - int c = filehandle.get(); - - while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { - name += c; - c = filehandle.get(); - } - -//cout << "name = " << name << endl; - filehandle.putback(c); - - return name; - - } - catch(exception& e) { - m->errorOut(e, "Tree", "readName"); - exit(1); - } -} -/*******************************************************/ -string Tree::readBranchLength(ifstream& filehandle) { - try { - - string br = ""; - int c; - while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { - br += c; - c = filehandle.get(); - } - filehandle.putback(c); - - return br; - - } - catch(exception& e) { - m->errorOut(e, "Tree", "readBranchLength"); - exit(1); - } -} /*******************************************************/ -/*******************************************************/