X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=tree.cpp;h=745893471e065abee12e7752b91c71bb37c1a635;hp=642c6658739dd5252b2d61083ce6af2b872785ec;hb=a8e2df1b96a57f5f29576b08361b86a96a8eff4f;hpb=38922fcff5a03abfedffda3e06a45fad2270a044 diff --git a/tree.cpp b/tree.cpp index 642c665..7458934 100644 --- a/tree.cpp +++ b/tree.cpp @@ -9,102 +9,639 @@ #include "tree.h" - /*****************************************************************/ -Tree::Tree() { +Tree::Tree(int num, CountTable* t) : ct(t) { try { - - globaldata = GlobalData::getInstance(); - numLeaves = globaldata->gTreemap->getNumSeqs(); - numNodes = 2*numLeaves - 1; + m = MothurOut::getInstance(); + numLeaves = num; + numNodes = 2*numLeaves - 1; + tree.resize(numNodes); + } + catch(exception& e) { + m->errorOut(e, "Tree", "Tree - numNodes"); + exit(1); + } +} +/*****************************************************************/ +Tree::Tree(string g) { //do not use tree generated by this its just to extract the treenames, its a chicken before the egg thing that needs to be revisited. + try { + m = MothurOut::getInstance(); + parseTreeFile(); m->runParse = false; + } + catch(exception& e) { + m->errorOut(e, "Tree", "Tree - just parse"); + exit(1); + } +} +/*****************************************************************/ +Tree::Tree(CountTable* t) : ct(t) { + try { + m = MothurOut::getInstance(); + + if (m->runParse == true) { parseTreeFile(); m->runParse = false; } + numLeaves = m->Treenames.size(); + numNodes = 2*numLeaves - 1; + + tree.resize(numNodes); + + //initialize groupNodeInfo + vector namesOfGroups = ct->getNamesOfGroups(); + for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } + //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { //initialize leaf nodes if (i <= (numLeaves-1)) { - tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]); - tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])); - //the node knows its index - tree[i].setIndex(i); - //set pcount and pGroup for groupname to 1. - tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1; - tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1; - //Treemap knows name, group and index to speed up search - globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i); - + tree[i].setName(m->Treenames[i]); + + //save group info + int maxPars = 1; + vector group; + vector counts = ct->getGroupCounts(m->Treenames[i]); + for (int j = 0; j < namesOfGroups.size(); j++) { + if (counts[j] != 0) { //you have seqs from this group + groupNodeInfo[namesOfGroups[j]].push_back(i); + group.push_back(namesOfGroups[j]); + tree[i].pGroups[namesOfGroups[j]] = counts[j]; + tree[i].pcount[namesOfGroups[j]] = counts[j]; + //keep highest group + if(counts[j] > maxPars){ maxPars = counts[j]; } + } + } + tree[i].setGroup(group); + setIndex(m->Treenames[i], i); + + if (maxPars > 1) { //then we have some more dominant groups + //erase all the groups that are less than maxPars because you found a more dominant group. + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ + if(it->second < maxPars){ + tree[i].pGroups.erase(it++); + }else { it++; } + } + //set one remaining groups to 1 + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ + tree[i].pGroups[it->first] = 1; + } + }//end if + //intialize non leaf nodes }else if (i > (numLeaves-1)) { tree[i].setName(""); - tree[i].setGroup(""); - //the node knows its index - tree[i].setIndex(i); + vector tempGroups; + tree[i].setGroup(tempGroups); } } + } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "Tree"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } - /*****************************************************************/ +Tree::Tree(CountTable* t, vector< vector >& sims) : ct(t) { + try { + m = MothurOut::getInstance(); + + if (m->runParse == true) { parseTreeFile(); m->runParse = false; } + numLeaves = m->Treenames.size(); + numNodes = 2*numLeaves - 1; + + tree.resize(numNodes); + + //initialize groupNodeInfo + vector namesOfGroups = ct->getNamesOfGroups(); + for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } + + //initialize tree with correct number of nodes, name and group info. + for (int i = 0; i < numNodes; i++) { + //initialize leaf nodes + if (i <= (numLeaves-1)) { + tree[i].setName(m->Treenames[i]); + + //save group info + int maxPars = 1; + vector group; + vector counts = ct->getGroupCounts(m->Treenames[i]); + for (int j = 0; j < namesOfGroups.size(); j++) { + if (counts[j] != 0) { //you have seqs from this group + groupNodeInfo[namesOfGroups[j]].push_back(i); + group.push_back(namesOfGroups[j]); + tree[i].pGroups[namesOfGroups[j]] = counts[j]; + tree[i].pcount[namesOfGroups[j]] = counts[j]; + //keep highest group + if(counts[j] > maxPars){ maxPars = counts[j]; } + } + } + tree[i].setGroup(group); + setIndex(m->Treenames[i], i); + + if (maxPars > 1) { //then we have some more dominant groups + //erase all the groups that are less than maxPars because you found a more dominant group. + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ + if(it->second < maxPars){ + tree[i].pGroups.erase(it++); + }else { it++; } + } + //set one remaining groups to 1 + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ + tree[i].pGroups[it->first] = 1; + } + }//end if + + //intialize non leaf nodes + }else if (i > (numLeaves-1)) { + tree[i].setName(""); + vector tempGroups; + tree[i].setGroup(tempGroups); + } + } -int Tree::getIndex(string searchName) { + + //build tree from matrix + //initialize indexes + map thisIndexes; //maps row in simMatrix to vector index in the tree + for (int g = 0; g < numLeaves; g++) { thisIndexes[g] = g; } + + //do merges and create tree structure by setting parents and children + //there are numGroups - 1 merges to do + for (int i = 0; i < (numLeaves - 1); i++) { + float largest = -1000.0; + + if (m->control_pressed) { break; } + + int row, column; + //find largest value in sims matrix by searching lower triangle + for (int j = 1; j < sims.size(); j++) { + for (int k = 0; k < j; k++) { + if (sims[j][k] > largest) { largest = sims[j][k]; row = j; column = k; } + } + } + + //set non-leaf node info and update leaves to know their parents + //non-leaf + tree[numLeaves + i].setChildren(thisIndexes[row], thisIndexes[column]); + + //parents + tree[thisIndexes[row]].setParent(numLeaves + i); + tree[thisIndexes[column]].setParent(numLeaves + i); + + //blength = distance / 2; + float blength = ((1.0 - largest) / 2); + + //branchlengths + tree[thisIndexes[row]].setBranchLength(blength - tree[thisIndexes[row]].getLengthToLeaves()); + tree[thisIndexes[column]].setBranchLength(blength - tree[thisIndexes[column]].getLengthToLeaves()); + + //set your length to leaves to your childs length plus branchlength + tree[numLeaves + i].setLengthToLeaves(tree[thisIndexes[row]].getLengthToLeaves() + tree[thisIndexes[row]].getBranchLength()); + + + //update index + thisIndexes[row] = numLeaves+i; + thisIndexes[column] = numLeaves+i; + + //remove highest value that caused the merge. + sims[row][column] = -1000.0; + sims[column][row] = -1000.0; + + //merge values in simsMatrix + for (int n = 0; n < sims.size(); n++) { + //row becomes merge of 2 groups + sims[row][n] = (sims[row][n] + sims[column][n]) / 2; + sims[n][row] = sims[row][n]; + //delete column + sims[column][n] = -1000.0; + sims[n][column] = -1000.0; + } + } + + //adjust tree to make sure root to tip length is .5 + int root = findRoot(); + tree[root].setBranchLength((0.5 - tree[root].getLengthToLeaves())); + + } + catch(exception& e) { + m->errorOut(e, "Tree", "Tree"); + exit(1); + } +} +/*****************************************************************/ +Tree::~Tree() {} +/***************************************************************** +void Tree::addNamesToCounts(map nameMap) { try { - //Treemap knows name, group and index to speed up search - // getIndex function will return the vector index or -1 if seq is not found. - int index = globaldata->gTreemap->getIndex(searchName); - return index; + //ex. seq1 seq2,seq3,se4 + // seq1 = pasture + // seq2 = forest + // seq4 = pasture + // seq3 = ocean + + //before this function seq1.pcount = pasture -> 1 + //after seq1.pcount = pasture -> 2, forest -> 1, ocean -> 1 + + //before this function seq1.pgroups = pasture -> 1 + //after seq1.pgroups = pasture -> 1 since that is the dominant group + + + //go through each leaf and update its pcounts and pgroups + + //float A = clock(); + + for (int i = 0; i < numLeaves; i++) { + + string name = tree[i].getName(); + + map::iterator itNames = nameMap.find(name); + + if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); } + else { + vector dupNames; + m->splitAtComma(nameMap[name], dupNames); + + map::iterator itCounts; + int maxPars = 1; + set groupsAddedForThisNode; + for (int j = 0; j < dupNames.size(); j++) { + + string group = tmap->getGroup(dupNames[j]); + + if (dupNames[j] != name) {//you already added yourself in the constructor + + if (groupsAddedForThisNode.count(group) == 0) { groupNodeInfo[group].push_back(i); groupsAddedForThisNode.insert(group); } //if you have not already added this node for this group, then add it + + //update pcounts + itCounts = tree[i].pcount.find(group); + if (itCounts == tree[i].pcount.end()) { //new group, add it + tree[i].pcount[group] = 1; + }else { + tree[i].pcount[group]++; + } + + //update pgroups + itCounts = tree[i].pGroups.find(group); + if (itCounts == tree[i].pGroups.end()) { //new group, add it + tree[i].pGroups[group] = 1; + }else{ + tree[i].pGroups[group]++; + } + + //keep highest group + if(tree[i].pGroups[group] > maxPars){ + maxPars = tree[i].pGroups[group]; + } + }else { groupsAddedForThisNode.insert(group); } //add it so you don't add it to groupNodeInfo again + }//end for + + if (maxPars > 1) { //then we have some more dominant groups + //erase all the groups that are less than maxPars because you found a more dominant group. + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ + if(it->second < maxPars){ + tree[i].pGroups.erase(it++); + }else { it++; } + } + //set one remaining groups to 1 + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ + tree[i].pGroups[it->first] = 1; + } + }//end if + + //update groups to reflect all the groups this node represents + vector nodeGroups; + map::iterator itGroups; + for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) { + nodeGroups.push_back(itGroups->first); + } + tree[i].setGroup(nodeGroups); + + }//end else + }//end for + //float B = clock(); + //cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl; + } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "addNamesToCounts"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; +}*/ +/*****************************************************************/ +int Tree::getIndex(string searchName) { + try { + map::iterator itIndex = indexes.find(searchName); + if (itIndex != indexes.end()) { + return itIndex->second; + } + return -1; + } + catch(exception& e) { + m->errorOut(e, "Tree", "getIndex"); exit(1); - } + } } /*****************************************************************/ void Tree::setIndex(string searchName, int index) { try { - //set index in treemap - globaldata->gTreemap->setIndex(searchName, index); + map::iterator itIndex = indexes.find(searchName); + if (itIndex == indexes.end()) { + indexes[searchName] = index; + } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "setIndex"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; +} +/*****************************************************************/ +int Tree::assembleTree() { + try { + //build the pGroups in non leaf nodes to be used in the parsimony calcs. + for (int i = numLeaves; i < numNodes; i++) { + if (m->control_pressed) { return 1; } + + tree[i].pGroups = (mergeGroups(i)); + tree[i].pcount = (mergeGcounts(i)); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Tree", "assembleTree"); exit(1); - } + } } /*****************************************************************/ -void Tree::assembleTree() { +//assumes leaf node names are in groups and no names file - used by indicator command +void Tree::getSubTree(Tree* Ctree, vector Groups) { try { - //build the pGroups in non leaf nodes to be used in the parsimony calcs. + + //copy Tree since we are going to destroy it + Tree* copy = new Tree(ct); + copy->getCopy(Ctree); + copy->assembleTree(); + + //we want to select some of the leaf nodes to create the output tree + //go through the input Tree starting at parents of leaves + //initialize groupNodeInfo + vector namesOfGroups = ct->getNamesOfGroups(); + for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } + + //initialize tree with correct number of nodes, name and group info. + for (int i = 0; i < numNodes; i++) { + //initialize leaf nodes + if (i <= (numLeaves-1)) { + tree[i].setName(Groups[i]); + + //save group info + int maxPars = 1; + vector group; + vector counts = ct->getGroupCounts(Groups[i]); + for (int j = 0; j < namesOfGroups.size(); j++) { + if (counts[j] != 0) { //you have seqs from this group + groupNodeInfo[namesOfGroups[j]].push_back(i); + group.push_back(namesOfGroups[j]); + tree[i].pGroups[namesOfGroups[j]] = counts[j]; + tree[i].pcount[namesOfGroups[j]] = counts[j]; + //keep highest group + if(counts[j] > maxPars){ maxPars = counts[j]; } + } + } + tree[i].setGroup(group); + setIndex(Groups[i], i); + + if (maxPars > 1) { //then we have some more dominant groups + //erase all the groups that are less than maxPars because you found a more dominant group. + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ + if(it->second < maxPars){ + tree[i].pGroups.erase(it++); + }else { it++; } + } + //set one remaining groups to 1 + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ + tree[i].pGroups[it->first] = 1; + } + }//end if + + //intialize non leaf nodes + }else if (i > (numLeaves-1)) { + tree[i].setName(""); + vector tempGroups; + tree[i].setGroup(tempGroups); + } + } + + set removedLeaves; + for (int i = 0; i < copy->getNumLeaves(); i++) { + + if (removedLeaves.count(i) == 0) { + + //am I in the group + int parent = copy->tree[i].getParent(); + + if (parent != -1) { + + if (m->inUsersGroups(copy->tree[i].getName(), Groups)) { + //find my siblings name + int parentRC = copy->tree[parent].getRChild(); + int parentLC = copy->tree[parent].getLChild(); + + //if I am the right child, then my sib is the left child + int sibIndex = parentRC; + if (parentRC == i) { sibIndex = parentLC; } + + string sibsName = copy->tree[sibIndex].getName(); + + //if yes, is my sibling + if ((m->inUsersGroups(sibsName, Groups)) || (sibsName == "")) { + //we both are okay no trimming required + }else{ + //i am, my sib is not, so remove sib by setting my parent to my grandparent + int grandparent = copy->tree[parent].getParent(); + int grandparentLC = copy->tree[grandparent].getLChild(); + int grandparentRC = copy->tree[grandparent].getRChild(); + + //whichever of my granparents children was my parent now equals me + if (grandparentLC == parent) { grandparentLC = i; } + else { grandparentRC = i; } + + copy->tree[i].setParent(grandparent); + copy->tree[i].setBranchLength((copy->tree[i].getBranchLength()+copy->tree[parent].getBranchLength())); + if (grandparent != -1) { + copy->tree[grandparent].setChildren(grandparentLC, grandparentRC); + } + removedLeaves.insert(sibIndex); + } + }else{ + //find my siblings name + int parentRC = copy->tree[parent].getRChild(); + int parentLC = copy->tree[parent].getLChild(); + + //if I am the right child, then my sib is the left child + int sibIndex = parentRC; + if (parentRC == i) { sibIndex = parentLC; } + + string sibsName = copy->tree[sibIndex].getName(); + + //if no is my sibling + if ((m->inUsersGroups(sibsName, Groups)) || (sibsName == "")) { + //i am not, but my sib is + int grandparent = copy->tree[parent].getParent(); + int grandparentLC = copy->tree[grandparent].getLChild(); + int grandparentRC = copy->tree[grandparent].getRChild(); + + //whichever of my granparents children was my parent now equals my sib + if (grandparentLC == parent) { grandparentLC = sibIndex; } + else { grandparentRC = sibIndex; } + + copy->tree[sibIndex].setParent(grandparent); + copy->tree[sibIndex].setBranchLength((copy->tree[sibIndex].getBranchLength()+copy->tree[parent].getBranchLength())); + if (grandparent != -1) { + copy->tree[grandparent].setChildren(grandparentLC, grandparentRC); + } + removedLeaves.insert(i); + }else{ + //neither of us are, so we want to eliminate ourselves and our parent + //so set our parents sib to our great-grandparent + int parent = copy->tree[i].getParent(); + int grandparent = copy->tree[parent].getParent(); + int parentsSibIndex; + if (grandparent != -1) { + int greatgrandparent = copy->tree[grandparent].getParent(); + int greatgrandparentLC, greatgrandparentRC; + if (greatgrandparent != -1) { + greatgrandparentLC = copy->tree[greatgrandparent].getLChild(); + greatgrandparentRC = copy->tree[greatgrandparent].getRChild(); + } + + int grandparentLC = copy->tree[grandparent].getLChild(); + int grandparentRC = copy->tree[grandparent].getRChild(); + + parentsSibIndex = grandparentLC; + if (grandparentLC == parent) { parentsSibIndex = grandparentRC; } + + //whichever of my greatgrandparents children was my grandparent + if (greatgrandparentLC == grandparent) { greatgrandparentLC = parentsSibIndex; } + else { greatgrandparentRC = parentsSibIndex; } + + copy->tree[parentsSibIndex].setParent(greatgrandparent); + copy->tree[parentsSibIndex].setBranchLength((copy->tree[parentsSibIndex].getBranchLength()+copy->tree[grandparent].getBranchLength())); + if (greatgrandparent != -1) { + copy->tree[greatgrandparent].setChildren(greatgrandparentLC, greatgrandparentRC); + } + }else{ + copy->tree[parent].setParent(-1); + //cout << "issues with making subtree" << endl; + } + removedLeaves.insert(sibIndex); + removedLeaves.insert(i); + } + } + } + } + } + + int root = 0; + for (int i = 0; i < copy->getNumNodes(); i++) { + //you found the root + if (copy->tree[i].getParent() == -1) { root = i; break; } + } + + int nextSpot = numLeaves; + populateNewTree(copy->tree, root, nextSpot); + + delete copy; + } + catch(exception& e) { + m->errorOut(e, "Tree", "getSubTree"); + exit(1); + } +} +/***************************************************************** +//assumes nameMap contains unique names as key or is empty. +//assumes numLeaves defined in tree constructor equals size of seqsToInclude and seqsToInclude only contains unique seqs. +int Tree::getSubTree(Tree* copy, vector seqsToInclude, map nameMap) { + try { + + if (numLeaves != seqsToInclude.size()) { m->mothurOut("[ERROR]: numLeaves does not equal numUniques, cannot create subtree.\n"); m->control_pressed = true; return 0; } + + getSubTree(copy, seqsToInclude); + if (nameMap.size() != 0) { addNamesToCounts(nameMap); } + + //build the pGroups in non leaf nodes to be used in the parsimony calcs. for (int i = numLeaves; i < numNodes; i++) { + if (m->control_pressed) { return 1; } + tree[i].pGroups = (mergeGroups(i)); tree[i].pcount = (mergeGcounts(i)); } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Tree", "getSubTree"); + exit(1); + } +} +/*****************************************************************/ +int Tree::populateNewTree(vector& oldtree, int node, int& index) { + try { + + if (oldtree[node].getLChild() != -1) { + int rc = populateNewTree(oldtree, oldtree[node].getLChild(), index); + int lc = populateNewTree(oldtree, oldtree[node].getRChild(), index); + + tree[index].setChildren(lc, rc); + tree[rc].setParent(index); + tree[lc].setParent(index); + + tree[index].setBranchLength(oldtree[node].getBranchLength()); + tree[rc].setBranchLength(oldtree[oldtree[node].getLChild()].getBranchLength()); + tree[lc].setBranchLength(oldtree[oldtree[node].getRChild()].getBranchLength()); + + return (index++); + }else { //you are a leaf + int indexInNewTree = getIndex(oldtree[node].getName()); + return indexInNewTree; + } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "populateNewTree"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; +} +/*****************************************************************/ +void Tree::getCopy(Tree* copy, bool subsample) { + try { + + //for each node in the tree copy its info + for (int i = 0; i < numNodes; i++) { + //copy branch length + tree[i].setBranchLength(copy->tree[i].getBranchLength()); + + //copy parent + tree[i].setParent(copy->tree[i].getParent()); + + //copy children + tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild()); + } + + //build the pGroups in non leaf nodes to be used in the parsimony calcs. + for (int i = numLeaves; i < numNodes; i++) { + if (m->control_pressed) { break; } + + tree[i].pGroups = (mergeGroups(i)); + tree[i].pcount = (mergeGcounts(i)); + } + } + catch(exception& e) { + m->errorOut(e, "Tree", "getCopy"); exit(1); - } + } } /*****************************************************************/ void Tree::getCopy(Tree* copy) { @@ -128,8 +665,8 @@ void Tree::getCopy(Tree* copy) { tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild()); //copy index in node and tmap + setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName())); tree[i].setIndex(copy->tree[i].getIndex()); - setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName())); //copy pGroups tree[i].pGroups = copy->tree[i].pGroups; @@ -137,15 +674,14 @@ void Tree::getCopy(Tree* copy) { //copy pcount tree[i].pcount = copy->tree[i].pcount; } + + groupNodeInfo = copy->groupNodeInfo; + } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "getCopy"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } /*****************************************************************/ //returns a map with a groupname and the number of times that group was seen in the children @@ -157,7 +693,7 @@ map Tree::mergeGroups(int i) { try { int lc = tree[i].getLChild(); int rc = tree[i].getRChild(); - + //set parsimony groups to left child map parsimony = tree[lc].pGroups; @@ -180,11 +716,10 @@ map Tree::mergeGroups(int i) { // this is true if right child had a greater parsimony for a certain group if(maxPars > 1){ //erase all the groups that are only 1 because you found something with 2. - for(it=parsimony.begin();it!=parsimony.end();it++){ + for(it=parsimony.begin();it!=parsimony.end();){ if(it->second == 1){ - parsimony.erase(it->first); - it--; - } + parsimony.erase(it++); + }else { it++; } } //set one remaining groups to 1 //so with our above example p[white] = 2 would be left and it would become p[white] = 1 @@ -197,13 +732,9 @@ map Tree::mergeGroups(int i) { return parsimony; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "mergeGroups"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } /*****************************************************************/ //returns a map with a groupname and the number of times that group was seen in the children @@ -218,13 +749,17 @@ map Tree::mergeUserGroups(int i, vector g) { int rc = tree[i].getRChild(); //loop through nodes groups removing the ones the user doesn't want - for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) { - if (inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it->first); } + for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){ + if (m->inUsersGroups(it->first, g) != true) { + tree[lc].pGroups.erase(it++); + }else { it++; } } - + //loop through nodes groups removing the ones the user doesn't want - for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) { - if (inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it->first); } + for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){ + if (m->inUsersGroups(it->first, g) != true) { + tree[rc].pGroups.erase(it++); + }else { it++; } } //set parsimony groups to left child @@ -245,34 +780,27 @@ map Tree::mergeUserGroups(int i, vector g) { maxPars = parsimony[it->first]; } } - + // this is true if right child had a greater parsimony for a certain group if(maxPars > 1){ //erase all the groups that are only 1 because you found something with 2. - for(it=parsimony.begin();it!=parsimony.end();it++){ + for(it=parsimony.begin();it!=parsimony.end();){ if(it->second == 1){ - parsimony.erase(it->first); - it--; - } + parsimony.erase(it++); + }else { it++; } } - //set one remaining groups to 1 - //so with our above example p[white] = 2 would be left and it would become p[white] = 1 + for(it=parsimony.begin();it!=parsimony.end();it++){ parsimony[it->first] = 1; } + } - } - return parsimony; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "mergeUserGroups"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } @@ -293,18 +821,18 @@ map Tree::mergeGcounts(int position) { return sum; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "mergeGcounts"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } /**************************************************************************************************/ - void Tree::randomLabels(vector g) { try { + + //initialize groupNodeInfo + for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) { + groupNodeInfo[(ct->getNamesOfGroups())[i]].resize(0); + } for(int i = 0; i < numLeaves; i++){ int z; @@ -315,8 +843,8 @@ void Tree::randomLabels(vector g) { //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them. bool treez, treei; - treez = inUsersGroups(tree[z].getGroup(), g); - treei = inUsersGroups(tree[i].getGroup(), g); + treez = m->inUsersGroups(tree[z].getGroup(), g); + treei = m->inUsersGroups(tree[i].getGroup(), g); if ((treez == true) && (treei == true)) { //switches node i and node z's info. @@ -324,7 +852,7 @@ void Tree::randomLabels(vector g) { tree[z].pGroups = (tree[i].pGroups); tree[i].pGroups = (lib_hold); - string zgroup = tree[z].getGroup(); + vector zgroup = tree[z].getGroup(); tree[z].setGroup(tree[i].getGroup()); tree[i].setGroup(zgroup); @@ -336,50 +864,15 @@ void Tree::randomLabels(vector g) { tree[z].pcount = (tree[i].pcount); tree[i].pcount = (gcount_hold); } + + for (int k = 0; k < (tree[i].getGroup()).size(); k++) { groupNodeInfo[(tree[i].getGroup())[k]].push_back(i); } + for (int k = 0; k < (tree[z].getGroup()).size(); k++) { groupNodeInfo[(tree[z].getGroup())[k]].push_back(z); } } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "randomLabels"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} -/**************************************************************************************************/ - -void Tree::randomLabels(string groupA, string groupB) { - try { - int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA]; - int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB]; - - vector randomGroups(numSeqsA+numSeqsB, groupA); - for(int i=numSeqsA;ierrorOut(e, "Tree", "randomBlengths"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(vector g) { @@ -408,7 +897,8 @@ void Tree::assembleRandomUnifracTree(vector g) { } /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(string groupA, string groupB) { - randomLabels(groupA, groupB); + vector temp; temp.push_back(groupA); temp.push_back(groupB); + randomLabels(temp); assembleTree(); } @@ -444,7 +934,7 @@ void Tree::randomTopology() { escape = 1; } } - + tree[i].setChildren(rnd_index1,rnd_index2); tree[i].setParent(-1); tree[rnd_index1].setParent(i); @@ -452,38 +942,66 @@ void Tree::randomTopology() { } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "randomTopology"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; +} +/*****************************************************************/ +void Tree::print(ostream& out) { + try { + int root = findRoot(); + printBranch(root, out, "branch"); + out << ";" << endl; + } + catch(exception& e) { + m->errorOut(e, "Tree", "print"); exit(1); - } + } +} +/*****************************************************************/ +void Tree::print(ostream& out, map nameMap) { + try { + int root = findRoot(); + printBranch(root, out, nameMap); + out << ";" << endl; + } + catch(exception& e) { + m->errorOut(e, "Tree", "print"); + exit(1); + } +} +/*****************************************************************/ +void Tree::print(ostream& out, string mode) { + try { + int root = findRoot(); + printBranch(root, out, mode); + out << ";" << endl; + } + catch(exception& e) { + m->errorOut(e, "Tree", "print"); + exit(1); + } } - /*****************************************************************/ // This prints out the tree in Newick form. void Tree::createNewickFile(string f) { try { int root = findRoot(); - //filename = getRootName(globaldata->getTreeFile()) + "newick"; + filename = f; - openOutputFile(filename, out); + + m->openOutputFile(filename, out); - printBranch(root); + printBranch(root, out, "branch"); // you are at the end of the tree out << ";" << endl; out.close(); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "createNewickFile"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } /*****************************************************************/ @@ -494,45 +1012,214 @@ int Tree::findRoot() { for (int i = 0; i < numNodes; i++) { //you found the root if (tree[i].getParent() == -1) { return i; } + //cout << "i = " << i << endl; + //cout << "i's parent = " << tree[i].getParent() << endl; } return -1; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "findRoot"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } +/*****************************************************************/ +void Tree::printBranch(int node, ostream& out, map names) { +try { +// you are not a leaf + if (tree[node].getLChild() != -1) { + out << "("; + printBranch(tree[node].getLChild(), out, names); + out << ","; + printBranch(tree[node].getRChild(), out, names); + out << ")"; + + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } + + }else { //you are a leaf + map::iterator itNames = names.find(tree[node].getName()); + + string outputString = ""; + if (itNames != names.end()) { + + vector dupNames; + m->splitAtComma((itNames->second), dupNames); + + if (dupNames.size() == 1) { + outputString += tree[node].getName(); + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + }else { + outputString += "("; + + for (int u = 0; u < dupNames.size()-1; u++) { + outputString += dupNames[u]; + + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(0.0); + } + outputString += ","; + } + + outputString += dupNames[dupNames.size()-1]; + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(0.0); + } + + outputString += ")"; + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + } + }else { + outputString = tree[node].getName(); + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + + m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); + } + + out << outputString; + } + + } + catch(exception& e) { + m->errorOut(e, "Tree", "printBranch"); + exit(1); + } +} /*****************************************************************/ -void Tree::printBranch(int node) { +void Tree::printBranch(int node, ostream& out, string mode) { + try { + + // you are not a leaf + if (tree[node].getLChild() != -1) { + out << "("; + printBranch(tree[node].getLChild(), out, mode); + out << ","; + printBranch(tree[node].getRChild(), out, mode); + out << ")"; + if (mode == "branch") { + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } + }else if (mode == "boot") { + //if there is a label then print it + if (tree[node].getLabel() != -1) { + out << tree[node].getLabel(); + } + }else if (mode == "both") { + if (tree[node].getLabel() != -1) { + out << tree[node].getLabel(); + } + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } + } + }else { //you are a leaf + vector leafGroup = ct->getGroups(tree[node].getName()); + + if (mode == "branch") { + out << leafGroup[0]; + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } + }else if (mode == "boot") { + out << leafGroup[0]; + //if there is a label then print it + if (tree[node].getLabel() != -1) { + out << tree[node].getLabel(); + } + }else if (mode == "both") { + out << tree[node].getName(); + if (tree[node].getLabel() != -1) { + out << tree[node].getLabel(); + } + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } + } + } + + } + catch(exception& e) { + m->errorOut(e, "Tree", "printBranch"); + exit(1); + } +} +/*****************************************************************/ +void Tree::printBranch(int node, ostream& out, string mode, vector& theseNodes) { try { // you are not a leaf - if (tree[node].getLChild() != -1) { + if (theseNodes[node].getLChild() != -1) { out << "("; - printBranch(tree[node].getLChild()); + printBranch(theseNodes[node].getLChild(), out, mode); out << ","; - printBranch(tree[node].getRChild()); + printBranch(theseNodes[node].getRChild(), out, mode); out << ")"; + if (mode == "branch") { + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + }else if (mode == "boot") { + //if there is a label then print it + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + }else if (mode == "both") { + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + } }else { //you are a leaf - out << tree[node].getGroup() << ":" << tree[node].getBranchLength(); + vector leafGroup = ct->getGroups(theseNodes[node].getName()); + + if (mode == "branch") { + out << leafGroup[0]; + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + }else if (mode == "boot") { + out << leafGroup[0]; + //if there is a label then print it + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + }else if (mode == "both") { + out << theseNodes[node].getName(); + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + } } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Tree", "printBranch"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } - /*****************************************************************/ void Tree::printTree() { @@ -545,5 +1232,166 @@ void Tree::printTree() { } /*****************************************************************/ +//this code is a mess and should be rethought...-slw +int Tree::parseTreeFile() { + + //only takes names from the first tree and assumes that all trees use the same names. + try { + string filename = m->getTreeFile(); + ifstream filehandle; + m->openInputFile(filename, filehandle); + int c, comment; + comment = 0; + int done = 1; + + //ifyou are not a nexus file + if((c = filehandle.peek()) != '#') { + while((c = filehandle.peek()) != ';') { + if (m->control_pressed) { filehandle.close(); return 0; } + while ((c = filehandle.peek()) != ';') { + if (m->control_pressed) { filehandle.close(); return 0; } + // get past comments + if(c == '[') { + comment = 1; + } + if(c == ']'){ + comment = 0; + } + if((c == '(') && (comment != 1)){ break; } + filehandle.get(); + } + + done = readTreeString(filehandle); + if (done == 0) { break; } + } + //ifyou are a nexus file + }else if((c = filehandle.peek()) == '#') { + string holder = ""; + + // get past comments + while(holder != "translate" && holder != "Translate"){ + if (m->control_pressed) { filehandle.close(); return 0; } + if(holder == "[" || holder == "[!"){ + comment = 1; + } + if(holder == "]"){ + comment = 0; + } + filehandle >> holder; + + //if there is no translate then you must read tree string otherwise use translate to get names + if((holder == "tree") && (comment != 1)){ + //pass over the "tree rep.6878900 = " + while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;} + + if(c == EOF) { break; } + filehandle.putback(c); //put back first ( of tree. + done = readTreeString(filehandle); + + break; + } + + if (done == 0) { break; } + } + + //use nexus translation rather than parsing tree to save time + if((holder == "translate") || (holder == "Translate")) { + + string number, name, h; + h = ""; // so it enters the loop the first time + while((h != ";") && (number != ";")) { + if (m->control_pressed) { filehandle.close(); return 0; } + filehandle >> number; + filehandle >> name; + + //c = , until done with translation then c = ; + h = name.substr(name.length()-1, name.length()); + name.erase(name.end()-1); //erase the comma + m->Treenames.push_back(number); + } + if(number == ";") { m->Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name + } + } + filehandle.close(); + return 0; + //for (int i = 0; i < globaldata->Treenames.size(); i++) { +//cout << globaldata->Treenames[i] << endl; } +//cout << globaldata->Treenames.size() << endl; + } + catch(exception& e) { + m->errorOut(e, "Tree", "parseTreeFile"); + exit(1); + } +} +/*******************************************************/ + +/*******************************************************/ +int Tree::readTreeString(ifstream& filehandle) { + try { + int c; + string name; //, k + + while((c = filehandle.peek()) != ';') { + if (m->control_pressed) { return 0; } +//k = c; +//cout << " at beginning of while " << k << endl; + if(c == ')') { + //to pass over labels in trees + c=filehandle.get(); + while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); } + filehandle.putback(c); + } + if(c == ';') { return 0; } + if(c == -1) { return 0; } + //if you are a name + if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space + name = ""; + c = filehandle.get(); + //k = c; +//cout << k << endl; + while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { + name += c; + c = filehandle.get(); + //k = c; +//cout << " in name while " << k << endl; + } + +//cout << "name = " << name << endl; + if (name != "\r" ) { + m->Treenames.push_back(name); } //cout << m->Treenames.size() << '\t' << name << endl; + + filehandle.putback(c); +//k = c; +//cout << " after putback" << k << endl; + } + + if(c == ':') { //read until you reach the end of the branch length + while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { + c = filehandle.get(); + //k = c; + //cout << " in branch while " << k << endl; + } + filehandle.putback(c); + } + + c = filehandle.get(); +//k = c; + //cout << " here after get " << k << endl; + if(c == ';') { return 0; } + if(c == ')') { filehandle.putback(c); } + //k = c; +//cout << k << endl; + + } + return 0; + } + catch(exception& e) { + m->errorOut(e, "Tree", "readTreeString"); + exit(1); + } +} + +/*******************************************************/ +/*******************************************************/