X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=tree.cpp;h=745893471e065abee12e7752b91c71bb37c1a635;hp=ed652508ef2e6b049c183777ae5eecab42100d97;hb=a8e2df1b96a57f5f29576b08361b86a96a8eff4f;hpb=e98d56be8369a799e61a411bc13d3bd1fa3451e5 diff --git a/tree.cpp b/tree.cpp index ed65250..7458934 100644 --- a/tree.cpp +++ b/tree.cpp @@ -10,7 +10,7 @@ #include "tree.h" /*****************************************************************/ -Tree::Tree(int num, TreeMap* t) : tmap(t) { +Tree::Tree(int num, CountTable* t) : ct(t) { try { m = MothurOut::getInstance(); @@ -36,21 +36,20 @@ Tree::Tree(string g) { //do not use tree generated by this its just to extract t } } /*****************************************************************/ -Tree::Tree(TreeMap* t) : tmap(t) { +Tree::Tree(CountTable* t) : ct(t) { try { m = MothurOut::getInstance(); if (m->runParse == true) { parseTreeFile(); m->runParse = false; } -//for(int i = 0; i < globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl; } + numLeaves = m->Treenames.size(); numNodes = 2*numLeaves - 1; tree.resize(numNodes); //initialize groupNodeInfo - for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { - groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0); - } + vector namesOfGroups = ct->getNamesOfGroups(); + for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { @@ -59,19 +58,35 @@ Tree::Tree(TreeMap* t) : tmap(t) { tree[i].setName(m->Treenames[i]); //save group info - string group = tmap->getGroup(m->Treenames[i]); - - vector tempGroups; tempGroups.push_back(group); - tree[i].setGroup(tempGroups); - groupNodeInfo[group].push_back(i); - - //set pcount and pGroup for groupname to 1. - tree[i].pcount[group] = 1; - tree[i].pGroups[group] = 1; - - //Treemap knows name, group and index to speed up search - tmap->setIndex(m->Treenames[i], i); - + int maxPars = 1; + vector group; + vector counts = ct->getGroupCounts(m->Treenames[i]); + for (int j = 0; j < namesOfGroups.size(); j++) { + if (counts[j] != 0) { //you have seqs from this group + groupNodeInfo[namesOfGroups[j]].push_back(i); + group.push_back(namesOfGroups[j]); + tree[i].pGroups[namesOfGroups[j]] = counts[j]; + tree[i].pcount[namesOfGroups[j]] = counts[j]; + //keep highest group + if(counts[j] > maxPars){ maxPars = counts[j]; } + } + } + tree[i].setGroup(group); + setIndex(m->Treenames[i], i); + + if (maxPars > 1) { //then we have some more dominant groups + //erase all the groups that are less than maxPars because you found a more dominant group. + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ + if(it->second < maxPars){ + tree[i].pGroups.erase(it++); + }else { it++; } + } + //set one remaining groups to 1 + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ + tree[i].pGroups[it->first] = 1; + } + }//end if + //intialize non leaf nodes }else if (i > (numLeaves-1)) { tree[i].setName(""); @@ -87,7 +102,7 @@ Tree::Tree(TreeMap* t) : tmap(t) { } } /*****************************************************************/ -Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { +Tree::Tree(CountTable* t, vector< vector >& sims) : ct(t) { try { m = MothurOut::getInstance(); @@ -98,9 +113,8 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { tree.resize(numNodes); //initialize groupNodeInfo - for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { - groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0); - } + vector namesOfGroups = ct->getNamesOfGroups(); + for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { @@ -109,18 +123,34 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { tree[i].setName(m->Treenames[i]); //save group info - string group = tmap->getGroup(m->Treenames[i]); - - vector tempGroups; tempGroups.push_back(group); - tree[i].setGroup(tempGroups); - groupNodeInfo[group].push_back(i); - - //set pcount and pGroup for groupname to 1. - tree[i].pcount[group] = 1; - tree[i].pGroups[group] = 1; - - //Treemap knows name, group and index to speed up search - tmap->setIndex(m->Treenames[i], i); + int maxPars = 1; + vector group; + vector counts = ct->getGroupCounts(m->Treenames[i]); + for (int j = 0; j < namesOfGroups.size(); j++) { + if (counts[j] != 0) { //you have seqs from this group + groupNodeInfo[namesOfGroups[j]].push_back(i); + group.push_back(namesOfGroups[j]); + tree[i].pGroups[namesOfGroups[j]] = counts[j]; + tree[i].pcount[namesOfGroups[j]] = counts[j]; + //keep highest group + if(counts[j] > maxPars){ maxPars = counts[j]; } + } + } + tree[i].setGroup(group); + setIndex(m->Treenames[i], i); + + if (maxPars > 1) { //then we have some more dominant groups + //erase all the groups that are less than maxPars because you found a more dominant group. + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ + if(it->second < maxPars){ + tree[i].pGroups.erase(it++); + }else { it++; } + } + //set one remaining groups to 1 + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ + tree[i].pGroups[it->first] = 1; + } + }//end if //intialize non leaf nodes }else if (i > (numLeaves-1)) { @@ -129,11 +159,12 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { tree[i].setGroup(tempGroups); } } + //build tree from matrix //initialize indexes - map indexes; //maps row in simMatrix to vector index in the tree - for (int g = 0; g < numLeaves; g++) { indexes[g] = g; } + map thisIndexes; //maps row in simMatrix to vector index in the tree + for (int g = 0; g < numLeaves; g++) { thisIndexes[g] = g; } //do merges and create tree structure by setting parents and children //there are numGroups - 1 merges to do @@ -152,26 +183,26 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { //set non-leaf node info and update leaves to know their parents //non-leaf - tree[numLeaves + i].setChildren(indexes[row], indexes[column]); + tree[numLeaves + i].setChildren(thisIndexes[row], thisIndexes[column]); //parents - tree[indexes[row]].setParent(numLeaves + i); - tree[indexes[column]].setParent(numLeaves + i); + tree[thisIndexes[row]].setParent(numLeaves + i); + tree[thisIndexes[column]].setParent(numLeaves + i); //blength = distance / 2; float blength = ((1.0 - largest) / 2); //branchlengths - tree[indexes[row]].setBranchLength(blength - tree[indexes[row]].getLengthToLeaves()); - tree[indexes[column]].setBranchLength(blength - tree[indexes[column]].getLengthToLeaves()); + tree[thisIndexes[row]].setBranchLength(blength - tree[thisIndexes[row]].getLengthToLeaves()); + tree[thisIndexes[column]].setBranchLength(blength - tree[thisIndexes[column]].getLengthToLeaves()); //set your length to leaves to your childs length plus branchlength - tree[numLeaves + i].setLengthToLeaves(tree[indexes[row]].getLengthToLeaves() + tree[indexes[row]].getBranchLength()); + tree[numLeaves + i].setLengthToLeaves(tree[thisIndexes[row]].getLengthToLeaves() + tree[thisIndexes[row]].getBranchLength()); //update index - indexes[row] = numLeaves+i; - indexes[column] = numLeaves+i; + thisIndexes[row] = numLeaves+i; + thisIndexes[column] = numLeaves+i; //remove highest value that caused the merge. sims[row][column] = -1000.0; @@ -200,7 +231,7 @@ Tree::Tree(TreeMap* t, vector< vector >& sims) : tmap(t) { } /*****************************************************************/ Tree::~Tree() {} -/*****************************************************************/ +/***************************************************************** void Tree::addNamesToCounts(map nameMap) { try { //ex. seq1 seq2,seq3,se4 @@ -297,15 +328,15 @@ void Tree::addNamesToCounts(map nameMap) { m->errorOut(e, "Tree", "addNamesToCounts"); exit(1); } -} +}*/ /*****************************************************************/ int Tree::getIndex(string searchName) { try { - //Treemap knows name, group and index to speed up search - // getIndex function will return the vector index or -1 if seq is not found. - int index = tmap->getIndex(searchName); - return index; - + map::iterator itIndex = indexes.find(searchName); + if (itIndex != indexes.end()) { + return itIndex->second; + } + return -1; } catch(exception& e) { m->errorOut(e, "Tree", "getIndex"); @@ -316,8 +347,10 @@ int Tree::getIndex(string searchName) { void Tree::setIndex(string searchName, int index) { try { - //set index in treemap - tmap->setIndex(searchName, index); + map::iterator itIndex = indexes.find(searchName); + if (itIndex == indexes.end()) { + indexes[searchName] = index; + } } catch(exception& e) { m->errorOut(e, "Tree", "setIndex"); @@ -325,14 +358,8 @@ void Tree::setIndex(string searchName, int index) { } } /*****************************************************************/ -int Tree::assembleTree(map nameMap) { - try { - //save for later - names = nameMap; - - //if user has given a names file we want to include that info in the pgroups and pcount info. - if(nameMap.size() != 0) { addNamesToCounts(nameMap); } - +int Tree::assembleTree() { + try { //build the pGroups in non leaf nodes to be used in the parsimony calcs. for (int i = numLeaves; i < numNodes; i++) { if (m->control_pressed) { return 1; } @@ -348,66 +375,66 @@ int Tree::assembleTree(map nameMap) { exit(1); } } -/***************************************************************** -int Tree::assembleTree(string n) { - try { - - //build the pGroups in non leaf nodes to be used in the parsimony calcs. - for (int i = numLeaves; i < numNodes; i++) { - if (m->control_pressed) { return 1; } - - tree[i].pGroups = (mergeGroups(i)); - tree[i].pcount = (mergeGcounts(i)); - } - //float B = clock(); - //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl; - return 0; - } - catch(exception& e) { - m->errorOut(e, "Tree", "assembleTree"); - exit(1); - } -} /*****************************************************************/ //assumes leaf node names are in groups and no names file - used by indicator command void Tree::getSubTree(Tree* Ctree, vector Groups) { try { //copy Tree since we are going to destroy it - Tree* copy = new Tree(tmap); + Tree* copy = new Tree(ct); copy->getCopy(Ctree); - map empty; - copy->assembleTree(empty); + copy->assembleTree(); //we want to select some of the leaf nodes to create the output tree //go through the input Tree starting at parents of leaves + //initialize groupNodeInfo + vector namesOfGroups = ct->getNamesOfGroups(); + for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } + + //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { - //initialize leaf nodes if (i <= (numLeaves-1)) { tree[i].setName(Groups[i]); //save group info - string group = tmap->getGroup(Groups[i]); - vector tempGroups; tempGroups.push_back(group); - tree[i].setGroup(tempGroups); - groupNodeInfo[group].push_back(i); - - //set pcount and pGroup for groupname to 1. - tree[i].pcount[group] = 1; - tree[i].pGroups[group] = 1; - - //Treemap knows name, group and index to speed up search - tmap->setIndex(Groups[i], i); - - //intialize non leaf nodes + int maxPars = 1; + vector group; + vector counts = ct->getGroupCounts(Groups[i]); + for (int j = 0; j < namesOfGroups.size(); j++) { + if (counts[j] != 0) { //you have seqs from this group + groupNodeInfo[namesOfGroups[j]].push_back(i); + group.push_back(namesOfGroups[j]); + tree[i].pGroups[namesOfGroups[j]] = counts[j]; + tree[i].pcount[namesOfGroups[j]] = counts[j]; + //keep highest group + if(counts[j] > maxPars){ maxPars = counts[j]; } + } + } + tree[i].setGroup(group); + setIndex(Groups[i], i); + + if (maxPars > 1) { //then we have some more dominant groups + //erase all the groups that are less than maxPars because you found a more dominant group. + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ + if(it->second < maxPars){ + tree[i].pGroups.erase(it++); + }else { it++; } + } + //set one remaining groups to 1 + for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ + tree[i].pGroups[it->first] = 1; + } + }//end if + + //intialize non leaf nodes }else if (i > (numLeaves-1)) { tree[i].setName(""); vector tempGroups; tree[i].setGroup(tempGroups); } } - + set removedLeaves; for (int i = 0; i < copy->getNumLeaves(); i++) { @@ -534,7 +561,7 @@ void Tree::getSubTree(Tree* Ctree, vector Groups) { exit(1); } } -/*****************************************************************/ +/***************************************************************** //assumes nameMap contains unique names as key or is empty. //assumes numLeaves defined in tree constructor equals size of seqsToInclude and seqsToInclude only contains unique seqs. int Tree::getSubTree(Tree* copy, vector seqsToInclude, map nameMap) { @@ -578,7 +605,7 @@ int Tree::populateNewTree(vector& oldtree, int node, int& index) { return (index++); }else { //you are a leaf - int indexInNewTree = tmap->getIndex(oldtree[node].getName()); + int indexInNewTree = getIndex(oldtree[node].getName()); return indexInNewTree; } } @@ -588,18 +615,11 @@ int Tree::populateNewTree(vector& oldtree, int node, int& index) { } } /*****************************************************************/ -void Tree::getCopy(Tree* copy, map nameMap, vector namesToInclude) { +void Tree::getCopy(Tree* copy, bool subsample) { try { //for each node in the tree copy its info for (int i = 0; i < numNodes; i++) { - //copy name - tree[i].setName(copy->tree[i].getName()); - - //copy group - vector temp; - tree[i].setGroup(temp); - //copy branch length tree[i].setBranchLength(copy->tree[i].getBranchLength()); @@ -608,93 +628,7 @@ void Tree::getCopy(Tree* copy, map nameMap, vector names //copy children tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild()); - - //copy index in node and tmap - tree[i].setIndex(copy->tree[i].getIndex()); - setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName())); - - //copy pGroups - tree[i].pGroups.clear(); - - //copy pcount - tree[i].pcount.clear(); - } - - groupNodeInfo.clear(); - - //now lets change prune the seqs not in namesToInclude by setting their group to "doNotIncludeMe" - for (int i = 0; i < numLeaves; i++) { - - if (m->control_pressed) { break; } - - string name = tree[i].getName(); - - map::iterator itNames = nameMap.find(name); - - if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); } - else { - vector dupNames; - m->splitAtComma(nameMap[name], dupNames); - - map::iterator itCounts; - int maxPars = 1; - set groupsAddedForThisNode; - for (int j = 0; j < dupNames.size(); j++) { - - string group = tmap->getGroup(dupNames[j]); - bool includeMe = m->inUsersGroups(dupNames[j], namesToInclude); - - if (!includeMe && (group != "doNotIncludeMe")) { m->mothurOut("[ERROR] : creating subtree in copy.\n"); m->control_pressed = true; } - else if (!includeMe) { - if (groupsAddedForThisNode.count(group) == 0) { groupNodeInfo[group].push_back(i); groupsAddedForThisNode.insert(group); } //if you have not already added this node for this group, then add it - - //update pcounts - itCounts = tree[i].pcount.find(group); - if (itCounts == tree[i].pcount.end()) { //new group, add it - tree[i].pcount[group] = 1; - }else { - tree[i].pcount[group]++; - } - - //update pgroups - itCounts = tree[i].pGroups.find(group); - if (itCounts == tree[i].pGroups.end()) { //new group, add it - tree[i].pGroups[group] = 1; - }else{ - tree[i].pGroups[group]++; - } - - //keep highest group - if(tree[i].pGroups[group] > maxPars){ - maxPars = tree[i].pGroups[group]; - } - } - }//end for - - if (maxPars > 1) { //then we have some more dominant groups - //erase all the groups that are less than maxPars because you found a more dominant group. - for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ - if(it->second < maxPars){ - tree[i].pGroups.erase(it++); - }else { it++; } - } - //set one remaining groups to 1 - for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ - tree[i].pGroups[it->first] = 1; - } - }//end if - - //update groups to reflect all the groups this node represents - vector nodeGroups; - map::iterator itGroups; - for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) { - nodeGroups.push_back(itGroups->first); - } - tree[i].setGroup(nodeGroups); - - }//end else - }//end for - + } //build the pGroups in non leaf nodes to be used in the parsimony calcs. for (int i = numLeaves; i < numNodes; i++) { @@ -731,8 +665,8 @@ void Tree::getCopy(Tree* copy) { tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild()); //copy index in node and tmap + setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName())); tree[i].setIndex(copy->tree[i].getIndex()); - setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName())); //copy pGroups tree[i].pGroups = copy->tree[i].pGroups; @@ -896,8 +830,8 @@ void Tree::randomLabels(vector g) { try { //initialize groupNodeInfo - for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { - groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0); + for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) { + groupNodeInfo[(ct->getNamesOfGroups())[i]].resize(0); } for(int i = 0; i < numLeaves; i++){ @@ -959,23 +893,20 @@ void Tree::randomBlengths() { /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(vector g) { randomLabels(g); - map empty; - assembleTree(empty); + assembleTree(); } /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(string groupA, string groupB) { vector temp; temp.push_back(groupA); temp.push_back(groupB); randomLabels(temp); - map empty; - assembleTree(empty); + assembleTree(); } /*************************************************************************************************/ //for now it's just random topology but may become random labels as well later that why this is such a simple function now... void Tree::assembleRandomTree() { randomTopology(); - map empty; - assembleTree(empty); + assembleTree(); } /**************************************************************************************************/ @@ -1194,16 +1125,16 @@ void Tree::printBranch(int node, ostream& out, string mode) { } } }else { //you are a leaf - string leafGroup = tmap->getGroup(tree[node].getName()); + vector leafGroup = ct->getGroups(tree[node].getName()); if (mode == "branch") { - out << leafGroup; + out << leafGroup[0]; //if there is a branch length then print it if (tree[node].getBranchLength() != -1) { out << ":" << tree[node].getBranchLength(); } }else if (mode == "boot") { - out << leafGroup; + out << leafGroup[0]; //if there is a label then print it if (tree[node].getLabel() != -1) { out << tree[node].getLabel(); @@ -1257,16 +1188,16 @@ void Tree::printBranch(int node, ostream& out, string mode, vector& theseN } } }else { //you are a leaf - string leafGroup = tmap->getGroup(theseNodes[node].getName()); + vector leafGroup = ct->getGroups(theseNodes[node].getName()); if (mode == "branch") { - out << leafGroup; + out << leafGroup[0]; //if there is a branch length then print it if (theseNodes[node].getBranchLength() != -1) { out << ":" << theseNodes[node].getBranchLength(); } }else if (mode == "boot") { - out << leafGroup; + out << leafGroup[0]; //if there is a label then print it if (theseNodes[node].getLabel() != -1) { out << theseNodes[node].getLabel(); @@ -1302,7 +1233,7 @@ void Tree::printTree() { /*****************************************************************/ //this code is a mess and should be rethought...-slw -void Tree::parseTreeFile() { +int Tree::parseTreeFile() { //only takes names from the first tree and assumes that all trees use the same names. try { @@ -1315,8 +1246,10 @@ void Tree::parseTreeFile() { //ifyou are not a nexus file if((c = filehandle.peek()) != '#') { - while((c = filehandle.peek()) != ';') { + while((c = filehandle.peek()) != ';') { + if (m->control_pressed) { filehandle.close(); return 0; } while ((c = filehandle.peek()) != ';') { + if (m->control_pressed) { filehandle.close(); return 0; } // get past comments if(c == '[') { comment = 1; @@ -1336,7 +1269,8 @@ void Tree::parseTreeFile() { string holder = ""; // get past comments - while(holder != "translate" && holder != "Translate"){ + while(holder != "translate" && holder != "Translate"){ + if (m->control_pressed) { filehandle.close(); return 0; } if(holder == "[" || holder == "[!"){ comment = 1; } @@ -1365,7 +1299,8 @@ void Tree::parseTreeFile() { string number, name, h; h = ""; // so it enters the loop the first time - while((h != ";") && (number != ";")) { + while((h != ";") && (number != ";")) { + if (m->control_pressed) { filehandle.close(); return 0; } filehandle >> number; filehandle >> name; @@ -1378,7 +1313,7 @@ void Tree::parseTreeFile() { } } filehandle.close(); - + return 0; //for (int i = 0; i < globaldata->Treenames.size(); i++) { //cout << globaldata->Treenames[i] << endl; } //cout << globaldata->Treenames.size() << endl; @@ -1396,7 +1331,8 @@ int Tree::readTreeString(ifstream& filehandle) { int c; string name; //, k - while((c = filehandle.peek()) != ';') { + while((c = filehandle.peek()) != ';') { + if (m->control_pressed) { return 0; } //k = c; //cout << " at beginning of while " << k << endl; if(c == ')') { @@ -1413,7 +1349,7 @@ int Tree::readTreeString(ifstream& filehandle) { c = filehandle.get(); //k = c; //cout << k << endl; - while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { + while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { name += c; c = filehandle.get(); //k = c; @@ -1421,7 +1357,9 @@ int Tree::readTreeString(ifstream& filehandle) { } //cout << "name = " << name << endl; - m->Treenames.push_back(name); + if (name != "\r" ) { + m->Treenames.push_back(name); } //cout << m->Treenames.size() << '\t' << name << endl; + filehandle.putback(c); //k = c; //cout << " after putback" << k << endl;