From: westcott Date: Wed, 23 Feb 2011 18:34:53 +0000 (+0000) Subject: fixed bug in unifrac commands with unrooted trees X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=edad2fa8defaa6856b3606a215bf64b91340eeb4 fixed bug in unifrac commands with unrooted trees --- diff --git a/readtree.cpp b/readtree.cpp index e98524b..8158a57 100644 --- a/readtree.cpp +++ b/readtree.cpp @@ -129,7 +129,7 @@ int ReadNewickTree::read() { } //if you are a nexus file }else if ((c = filehandle.peek()) == '#') { - nexusTranslation(); //reads file through the translation and updates treemap + holder = nexusTranslation(); //reads file through the translation and updates treemap while((c = filehandle.peek()) != EOF) { // get past comments while ((c = filehandle.peek()) != EOF) { @@ -175,11 +175,11 @@ int ReadNewickTree::read() { } /**************************************************************************************************/ //This function read the file through the translation of the sequences names and updates treemap. -void ReadNewickTree::nexusTranslation() { +string ReadNewickTree::nexusTranslation() { try { holder = ""; - int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were + //int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were int comment = 0; // get past comments @@ -191,23 +191,39 @@ void ReadNewickTree::nexusTranslation() { comment = 0; } filehandle >> holder; - if(holder == "tree" && comment != 1){return;} + if(holder == "tree" && comment != 1){return holder;} } //update treemap globaldata->gTreemap->namesOfSeqs.clear(); - for(int i=0;i> number; - filehandle >> name; - name.erase(name.end()-1); //erase the comma + + char c; + string number, name; + while ((c = filehandle.peek()) != EOF) { + + filehandle >> number; + + if ((number == "tree") || (number == ";") ) { name = number; break; } + + filehandle >> name; + + char lastChar; + if (name.length() != 0) { lastChar = name[name.length()-1]; } + + if ((name == "tree") || (name == ";") ) { break; } + + if (lastChar == ',') { name.erase(name.end()-1); } //erase the comma + //insert new one with new name - globaldata->gTreemap->treemap[toString(number)].groupname = globaldata->gTreemap->treemap[name].groupname; - globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->treemap[name].vectorIndex; + string group = globaldata->gTreemap->getGroup(name); + globaldata->gTreemap->treemap[toString(number)].groupname = group; + globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->getIndex(name); //erase old one. so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah. globaldata->gTreemap->treemap.erase(name); globaldata->gTreemap->namesOfSeqs.push_back(number); } + + return name; } catch(exception& e) { m->errorOut(e, "ReadNewickTree", "nexusTranslation"); diff --git a/readtree.h b/readtree.h index 0c36833..7cfb4b4 100644 --- a/readtree.h +++ b/readtree.h @@ -51,7 +51,7 @@ private: Tree* T; int readNewickInt(istream&, int&, Tree*); int readTreeString(); - void nexusTranslation(); + string nexusTranslation(); ifstream filehandle; string treeFile; string holder; diff --git a/tree.cpp b/tree.cpp index 08ee850..06f92cf 100644 --- a/tree.cpp +++ b/tree.cpp @@ -50,12 +50,12 @@ Tree::Tree() { numNodes = 2*numLeaves - 1; tree.resize(numNodes); - + //initialize groupNodeInfo for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) { groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0); } - + //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { //initialize leaf nodes @@ -64,6 +64,7 @@ Tree::Tree() { //save group info string group = globaldata->gTreemap->getGroup(globaldata->Treenames[i]); + vector tempGroups; tempGroups.push_back(group); tree[i].setGroup(tempGroups); groupNodeInfo[group].push_back(i); @@ -82,6 +83,7 @@ Tree::Tree() { tree[i].setGroup(tempGroups); } } + } catch(exception& e) { m->errorOut(e, "Tree", "Tree"); diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 566fd10..fbbc4f1 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -432,7 +432,7 @@ void UnifracUnweightedCommand::createPhylipFile(int i) { out << name << '\t'; //output distances - for (int l = 0; l < globaldata->Groups.size(); l++) { out << dists[r][l] << '\t'; } + for (int l = 0; l < globaldata->Groups.size(); l++) { out << setprecision(11) << dists[r][l] << '\t'; } out << endl; }else{ //output distances diff --git a/unweighted.cpp b/unweighted.cpp index fb2f0b2..312d549 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -172,12 +172,11 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo EstOutput results; results.resize(num); int count = 0; - int numLeaves = t->getNumLeaves(); int total = num; int twentyPercent = (total * 0.20); if (twentyPercent == 0) { twentyPercent = 1; } - - + + for (int h = start; h < (start+num); h++) { //cout << namesOfGroupCombos[h][0] << '\t' << namesOfGroupCombos[h][1] << endl; if (m->control_pressed) { return results; } @@ -185,69 +184,57 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group double totalBL = 0.00; //all branch lengths double UW = 0.00; //Unweighted Value = UniqueBL / totalBL; - map tempTotals; //maps node to total Branch Length - map nodePcountSize; //maps node to pcountSize - map::iterator itCount; - - for(int i=0;igetNumNodes();i++){ + + //find a node that belongs to one of the groups in this combo + int nodeBelonging = -1; + for (int g = 0; g < namesOfGroupCombos[h].size(); g++) { + if (t->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = t->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; } + } - if (m->control_pressed) { return data; } - - //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want - //pcountSize = 2, not unique to one group - //pcountSize = 1, unique to one group - - int pcountSize = 0; - for (int j = 0; j < namesOfGroupCombos[h].size(); j++) { - map::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]); - if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } - } - - nodePcountSize[i] = pcountSize; - - //cout << i << '\t' << t->tree[i].getName() << " br = " << abs(t->tree[i].getBranchLength()) << '\t'; - if (pcountSize == 0) { } - else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) { UniqueBL += abs(t->tree[i].getBranchLength()); } + //sanity check + if (nodeBelonging == -1) { + m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); + for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); } + m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]); + m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW; + }else{ + + getRoot(t, nodeBelonging, namesOfGroupCombos[h]); - //if you are a leaf from a users group add to total - if (i < numLeaves) { - if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { - //cout << "added to total" << endl; - totalBL += abs(t->tree[i].getBranchLength()); + for(int i=0;igetNumNodes();i++){ + + if (m->control_pressed) { return data; } + + //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want + //pcountSize = 2, not unique to one group + //pcountSize = 1, unique to one group + + int pcountSize = 0; + for (int j = 0; j < namesOfGroupCombos[h].size(); j++) { + map::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]); + if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } + } + // + //unique calc + if (pcountSize == 0) { } + else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root + UniqueBL += abs(t->tree[i].getBranchLength()); } - tempTotals[i] = 0.0; //we don't care about you, or we have already added you - }else{ //if you are not a leaf - //do both your chidren have have descendants from the users groups? - int lc = t->tree[i].getLChild(); - int rc = t->tree[i].getRChild(); - //if yes, add your childrens tempTotals - if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) { - totalBL += tempTotals[lc] + tempTotals[rc]; - //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; - if (t->tree[i].getBranchLength() != -1) { - tempTotals[i] = abs(t->tree[i].getBranchLength()); - }else { - tempTotals[i] = 0.0; - } - }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0; //we don't care about you - }else { //if no, your tempTotal is your childrens temp totals + your branch length - if (t->tree[i].getBranchLength() != -1) { - tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(t->tree[i].getBranchLength()); - }else { - tempTotals[i] = tempTotals[lc] + tempTotals[rc]; - } + //total calc + if (pcountSize == 0) { } + else if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root + totalBL += abs(t->tree[i].getBranchLength()); } - //cout << "temptotal = "<< tempTotals[i] << endl; + } - - } - //cout << UniqueBL << '\t' << totalBL << endl; - UW = (UniqueBL / totalBL); + cout << UniqueBL << '\t' << totalBL << endl; + UW = (UniqueBL / totalBL); - if (isnan(UW) || isinf(UW)) { UW = 0; } + if (isnan(UW) || isinf(UW)) { UW = 0; } - results[count] = UW; + results[count] = UW; + } count++; //report progress @@ -425,7 +412,6 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo EstOutput results; results.resize(num); int count = 0; - int numLeaves = t->getNumLeaves(); Tree* copyTree = new Tree; @@ -442,68 +428,58 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group double totalBL = 0.00; //all branch lengths double UW = 0.00; //Unweighted Value = UniqueBL / totalBL; - map tempTotals; //maps node to total Branch Length - map nodePcountSize; //maps node to pcountSize - - for(int i=0;igetNumNodes();i++){ + //find a node that belongs to one of the groups in this combo + int nodeBelonging = -1; + for (int g = 0; g < namesOfGroupCombos[h].size(); g++) { + if (copyTree->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; } + } - if (m->control_pressed) { return data; } - - //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want - //pcountSize = 2, not unique to one group - //pcountSize = 1, unique to one group - - int pcountSize = 0; - for (int j = 0; j < namesOfGroupCombos[h].size(); j++) { - map::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]); - if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } - } + //sanity check + if (nodeBelonging == -1) { + m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); + for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); } + m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]); + m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW; + }else{ - nodePcountSize[i] = pcountSize; - - if (pcountSize == 0) { } - else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) { UniqueBL += abs(copyTree->tree[i].getBranchLength()); } + getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]); - //if you are a leaf from a users group add to total - if (i < numLeaves) { - if ((copyTree->tree[i].getBranchLength() != -1) && pcountSize != 0) { - totalBL += abs(copyTree->tree[i].getBranchLength()); + for(int i=0;igetNumNodes();i++){ + + if (m->control_pressed) { return data; } + + //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want + //pcountSize = 2, not unique to one group + //pcountSize = 1, unique to one group + + int pcountSize = 0; + for (int j = 0; j < namesOfGroupCombos[h].size(); j++) { + map::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]); + if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } - tempTotals[i] = 0.0; //we don't care about you, or we have already added you - }else{ //if you are not a leaf - //do both your chidren have have descendants from the users groups? - int lc = copyTree->tree[i].getLChild(); - int rc = copyTree->tree[i].getRChild(); - //if yes, add your childrens tempTotals - if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) { - totalBL += tempTotals[lc] + tempTotals[rc]; - - if (copyTree->tree[i].getBranchLength() != -1) { - tempTotals[i] = abs(copyTree->tree[i].getBranchLength()); - }else { - tempTotals[i] = 0.0; - } - }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0; //we don't care about you - }else { //if no, your tempTotal is your childrens temp totals + your branch length - if (t->tree[i].getBranchLength() != -1) { - tempTotals[i] = tempTotals[lc] + tempTotals[rc] + abs(copyTree->tree[i].getBranchLength()); - }else { - tempTotals[i] = tempTotals[lc] + tempTotals[rc]; - } + //unique calc + if (pcountSize == 0) { } + else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root + UniqueBL += abs(copyTree->tree[i].getBranchLength()); + } + + //total calc + if (pcountSize == 0) { } + else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root + totalBL += abs(copyTree->tree[i].getBranchLength()); } } - + //cout << UniqueBL << '\t' << totalBL << endl; + UW = (UniqueBL / totalBL); + + if (isnan(UW) || isinf(UW)) { UW = 0; } + + results[count] = UW; } - - UW = (UniqueBL / totalBL); - - if (isnan(UW) || isinf(UW)) { UW = 0; } - - results[count] = UW; count++; - + } delete copyTree; @@ -516,5 +492,61 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo } } /**************************************************************************************************/ +int Unweighted::getRoot(Tree* t, int v, vector grouping) { + try { + //you are a leaf so get your parent + int index = t->tree[index].getParent(); + + //my parent is a potential root + rootForGrouping[grouping].insert(index); + + //while you aren't at root + while(t->tree[index].getParent() != -1){ + + if (m->control_pressed) { return 0; } + + //am I the root for this grouping? if so I want to stop "early" + //does my sibling have descendants from the users groups? + //if so I am not the root + int parent = t->tree[index].getParent(); + int lc = t->tree[parent].getLChild(); + int rc = t->tree[parent].getRChild(); + + int sib = lc; + if (lc == index) { sib = rc; } + + map::iterator itGroup; + int pcountSize = 0; + for (int j = 0; j < grouping.size(); j++) { + map::iterator itGroup = t->tree[sib].pcount.find(grouping[j]); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } + } + + //if yes, I am not the root + if (pcountSize != 0) { + rootForGrouping[grouping].clear(); + rootForGrouping[grouping].insert(parent); + } + + index = parent; + } + + //get all nodes above the root to add so we don't add their u values above + index = *(rootForGrouping[grouping].begin()); + while(t->tree[index].getParent() != -1){ + int parent = t->tree[index].getParent(); + rootForGrouping[grouping].insert(parent); + cout << parent << " in root" << endl; + index = parent; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "getRoot"); + exit(1); + } +} +/**************************************************************************************************/ diff --git a/unweighted.h b/unweighted.h index f413261..598af1a 100644 --- a/unweighted.h +++ b/unweighted.h @@ -37,12 +37,13 @@ class Unweighted : public TreeCalculator { TreeMap* tmap; int processors; string outputDir; + map< vector, set > rootForGrouping; //maps a grouping combo to the roots for that combo EstOutput driver(Tree*, vector< vector >, int, int); EstOutput createProcesses(Tree*, vector< vector >); EstOutput driver(Tree*, vector< vector >, int, int, bool); EstOutput createProcesses(Tree*, vector< vector >, bool); - + int getRoot(Tree*, int, vector); }; /***********************************************************************/