From c76b65598a0b85183126bd764c83f354e9f490d1 Mon Sep 17 00:00:00 2001 From: westcott Date: Thu, 7 May 2009 15:47:53 +0000 Subject: [PATCH] fixed concensus command and modified tree class so you can print labels on trees --- Mothur.xcodeproj/project.pbxproj | 62 ++++++ concensuscommand.cpp | 370 +++++++++++++++++++++++-------- concensuscommand.h | 20 +- tree.cpp | 53 ++++- tree.h | 3 +- treenode.cpp | 5 + treenode.h | 4 +- 7 files changed, 406 insertions(+), 111 deletions(-) diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index bfed0cb..5e6e792 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -36,6 +36,16 @@ 375AA13A0F9E433D008EF9B8 /* readotu.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 375AA1350F9E433D008EF9B8 /* readotu.cpp */; }; 375AA13B0F9E433D008EF9B8 /* readphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 375AA1370F9E433D008EF9B8 /* readphylip.cpp */; }; 377326650FAF16E0007ABB8B /* concensuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 377326630FAF16E0007ABB8B /* concensuscommand.cpp */; }; + 378C1B030FB0644E004D63F5 /* filterseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */; }; + 378C1B040FB0644E004D63F5 /* goodscoverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */; }; + 378C1B050FB0644E004D63F5 /* readclustal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF20FB0644D004D63F5 /* readclustal.cpp */; }; + 378C1B060FB0644E004D63F5 /* readfasta.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF40FB0644D004D63F5 /* readfasta.cpp */; }; + 378C1B070FB0644E004D63F5 /* readnexus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF60FB0644D004D63F5 /* readnexus.cpp */; }; + 378C1B080FB0644E004D63F5 /* readseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */; }; + 378C1B090FB0644E004D63F5 /* readseqsphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */; }; + 378C1B0A0FB0644E004D63F5 /* sequencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */; }; + 378C1B0B0FB0644E004D63F5 /* sharedjackknife.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */; }; + 378C1B0C0FB0644E004D63F5 /* sharedmarczewski.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */; }; 379293C30F2DE73400B9034A /* treemap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379293C20F2DE73400B9034A /* treemap.cpp */; }; 379294700F2E191800B9034A /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3792946F0F2E191800B9034A /* parsimonycommand.cpp */; }; 3792948A0F2E258500B9034A /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379294890F2E258500B9034A /* parsimony.cpp */; }; @@ -203,6 +213,27 @@ 375AA1380F9E433D008EF9B8 /* readphylip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylip.h; sourceTree = ""; }; 377326630FAF16E0007ABB8B /* concensuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = concensuscommand.cpp; sourceTree = ""; }; 377326640FAF16E0007ABB8B /* concensuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = concensuscommand.h; sourceTree = ""; }; + 378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = filterseqscommand.cpp; sourceTree = ""; }; + 378C1AEF0FB0644D004D63F5 /* filterseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filterseqscommand.h; sourceTree = ""; }; + 378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = goodscoverage.cpp; sourceTree = ""; }; + 378C1AF10FB0644D004D63F5 /* goodscoverage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = goodscoverage.h; sourceTree = ""; }; + 378C1AF20FB0644D004D63F5 /* readclustal.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readclustal.cpp; sourceTree = ""; }; + 378C1AF30FB0644D004D63F5 /* readclustal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readclustal.h; sourceTree = ""; }; + 378C1AF40FB0644D004D63F5 /* readfasta.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readfasta.cpp; sourceTree = ""; }; + 378C1AF50FB0644D004D63F5 /* readfasta.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readfasta.h; sourceTree = ""; }; + 378C1AF60FB0644D004D63F5 /* readnexus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readnexus.cpp; sourceTree = ""; }; + 378C1AF70FB0644D004D63F5 /* readnexus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readnexus.h; sourceTree = ""; }; + 378C1AF80FB0644D004D63F5 /* readnexusal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readnexusal.h; sourceTree = ""; }; + 378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readseqscommand.cpp; sourceTree = ""; }; + 378C1AFA0FB0644D004D63F5 /* readseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readseqscommand.h; sourceTree = ""; }; + 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readseqsphylip.cpp; sourceTree = ""; }; + 378C1AFC0FB0644D004D63F5 /* readseqsphylip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readseqsphylip.h; sourceTree = ""; }; + 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequencedb.cpp; sourceTree = ""; }; + 378C1AFE0FB0644D004D63F5 /* sequencedb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequencedb.h; sourceTree = ""; }; + 378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedjackknife.cpp; sourceTree = ""; }; + 378C1B000FB0644D004D63F5 /* sharedjackknife.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedjackknife.h; sourceTree = ""; }; + 378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedmarczewski.cpp; sourceTree = ""; }; + 378C1B020FB0644D004D63F5 /* sharedmarczewski.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedmarczewski.h; sourceTree = ""; }; 379293C10F2DE73400B9034A /* treemap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treemap.h; sourceTree = ""; }; 379293C20F2DE73400B9034A /* treemap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treemap.cpp; sourceTree = ""; }; 3792946E0F2E191800B9034A /* parsimonycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsimonycommand.h; sourceTree = ""; }; @@ -473,13 +504,24 @@ 37D928060F21331F001D4494 /* rarefact.h */, 37D928050F21331F001D4494 /* rarefact.cpp */, 37D928090F21331F001D4494 /* rarefactioncurvedata.h */, + 378C1AF30FB0644D004D63F5 /* readclustal.h */, + 378C1AF20FB0644D004D63F5 /* readclustal.cpp */, 375AA1340F9E433D008EF9B8 /* readcolumn.h */, 375AA1330F9E433D008EF9B8 /* readcolumn.cpp */, + 378C1AF50FB0644D004D63F5 /* readfasta.h */, + 378C1AF40FB0644D004D63F5 /* readfasta.cpp */, 37D928130F21331F001D4494 /* readmatrix.hpp */, + 378C1AF70FB0644D004D63F5 /* readnexus.h */, + 378C1AF60FB0644D004D63F5 /* readnexus.cpp */, + 378C1AF80FB0644D004D63F5 /* readnexusal.h */, + 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */, + 378C1AFE0FB0644D004D63F5 /* sequencedb.h */, 375AA1360F9E433D008EF9B8 /* readotu.h */, 375AA1350F9E433D008EF9B8 /* readotu.cpp */, 375AA1380F9E433D008EF9B8 /* readphylip.h */, 375AA1370F9E433D008EF9B8 /* readphylip.cpp */, + 378C1AFC0FB0644D004D63F5 /* readseqsphylip.h */, + 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */, 37AD4DC80F28F3DD00AA2D49 /* readtree.h */, 37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */, 37D9281D0F21331F001D4494 /* sequence.hpp */, @@ -527,6 +569,8 @@ 7EC3D4500FA0FFF900338DA5 /* coverage.cpp */, EB9303F70F53517300E8EF26 /* geom.h */, EB9303F80F53517300E8EF26 /* geom.cpp */, + 378C1AF10FB0644D004D63F5 /* goodscoverage.h */, + 378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */, EB9303E90F534D9400E8EF26 /* logsd.h */, EB9303EA0F534D9400E8EF26 /* logsd.cpp */, EB6E68170F5F1C780044E17B /* qstat.h */, @@ -553,6 +597,8 @@ 37D928240F21331F001D4494 /* sharedchao1.cpp */, 37D928290F21331F001D4494 /* sharedjabund.h */, 37D928280F21331F001D4494 /* sharedjabund.cpp */, + 378C1B000FB0644D004D63F5 /* sharedjackknife.h */, + 378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */, 37D9282B0F21331F001D4494 /* sharedjclass.h */, 37D9282A0F21331F001D4494 /* sharedjclass.cpp */, 37D9282D0F21331F001D4494 /* sharedjest.h */, @@ -565,6 +611,8 @@ 375874020F7D64EF0040F377 /* sharedkulczynskicody.cpp */, 375874010F7D64EF0040F377 /* sharedlennon.h */, 375873FF0F7D64EF0040F377 /* sharedlennon.cpp */, + 378C1B020FB0644D004D63F5 /* sharedmarczewski.h */, + 378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */, 375874070F7D64FC0040F377 /* sharedmorisitahorn.h */, 375874080F7D64FC0040F377 /* sharedmorisitahorn.cpp */, 375874090F7D64FC0040F377 /* sharedochiai.h */, @@ -617,6 +665,8 @@ 377326630FAF16E0007ABB8B /* concensuscommand.cpp */, 37B28F660F27590100808A62 /* deconvolutecommand.h */, 37B28F670F27590100808A62 /* deconvolutecommand.cpp */, + 378C1AEF0FB0644D004D63F5 /* filterseqscommand.h */, + 378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */, A70B53A50F4CD7AD0064797E /* getgroupcommand.h */, A70B53A40F4CD7AD0064797E /* getgroupcommand.cpp */, A70B53A70F4CD7AD0064797E /* getlabelcommand.h */, @@ -647,6 +697,8 @@ 372E12950F263D5A0095CF7E /* readdistcommand.cpp */, 372E126E0F26365B0095CF7E /* readotucommand.h */, 372E126F0F26365B0095CF7E /* readotucommand.cpp */, + 378C1AFA0FB0644D004D63F5 /* readseqscommand.h */, + 378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */, 37E5F4900F2A3DA800F8D827 /* readtreecommand.h */, 37E5F4910F2A3DA800F8D827 /* readtreecommand.cpp */, 37D928270F21331F001D4494 /* sharedcommand.h */, @@ -891,6 +943,16 @@ 7EC3D4550FA0FFF900338DA5 /* coverage.cpp in Sources */, 7EC3D4560FA0FFF900338DA5 /* whittaker.cpp in Sources */, 377326650FAF16E0007ABB8B /* concensuscommand.cpp in Sources */, + 378C1B030FB0644E004D63F5 /* filterseqscommand.cpp in Sources */, + 378C1B040FB0644E004D63F5 /* goodscoverage.cpp in Sources */, + 378C1B050FB0644E004D63F5 /* readclustal.cpp in Sources */, + 378C1B060FB0644E004D63F5 /* readfasta.cpp in Sources */, + 378C1B070FB0644E004D63F5 /* readnexus.cpp in Sources */, + 378C1B080FB0644E004D63F5 /* readseqscommand.cpp in Sources */, + 378C1B090FB0644E004D63F5 /* readseqsphylip.cpp in Sources */, + 378C1B0A0FB0644E004D63F5 /* sequencedb.cpp in Sources */, + 378C1B0B0FB0644E004D63F5 /* sharedjackknife.cpp in Sources */, + 378C1B0C0FB0644E004D63F5 /* sharedmarczewski.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/concensuscommand.cpp b/concensuscommand.cpp index cc00cd0..01df253 100644 --- a/concensuscommand.cpp +++ b/concensuscommand.cpp @@ -40,103 +40,97 @@ int ConcensusCommand::execute(){ numLeaves = t[0]->getNumLeaves(); } - //initialize nodepairs - nodePairs.resize(numNodes-numLeaves); - - //process each tree and fill counts - for (int i = 0; i < t.size(); i++) { - //process each nonleaf node - for (int j = numLeaves; j < numNodes; j++) { - //get pairing - int leftChild = t[i]->tree[j].getLChild(); - int rightChild = t[i]->tree[j].getRChild(); - string pair = toString(leftChild) + "-" + toString(rightChild); - - //if this is an existing pairing for this node then increment the count otherwise add new pairing. - it = nodePairs[j-numLeaves].find(pair); - if (it != nodePairs[j-numLeaves].end()) {//already have that score - nodePairs[j-numLeaves][pair]++; - }else{//first time we have seen this score - nodePairs[j-numLeaves][pair] = 1; - } - } - } - + //get the possible pairings + getSets(); //print out pairings for testing - /*for (int i = 0; i < nodePairs.size(); i++) { - cout << "pairs for node " << i+numLeaves << endl; - for (it = nodePairs[i].begin(); it != nodePairs[i].end(); it++) { - cout << " pair = " << it->first << " count = " << it->second << endl; + /*cout << "possible pairing " << endl; + for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) { + for (int i = 0; i < it2->first.size(); i++) { + cout << it2->first[i] << " "; } + cout << '\t' << it2->second << endl; }*/ + //open file for pairing not included in the tree notIncluded = getRootName(globaldata->inputFileName) + "concensuspairs"; openOutputFile(notIncluded, out2); concensusTree = new Tree(); - //set relationships for nonleaf nodes - for (int j = numLeaves; j < numNodes; j++) { - - //find that nodes pairing with the highest count - int large = 0; - for (it = nodePairs[j-numLeaves].begin(); it != nodePairs[j-numLeaves].end(); it++) { - if (it->second > large) { large = it->second; it2 = it; } - } - - string pair = it2->first; - int pos = pair.find_first_of('-'); - string lefts = pair.substr(0, pos); - string rights = pair.substr(pos+1, pair.length()); - - //converts string to int - int left = atoi(lefts.c_str()); - int right = atoi(rights.c_str()); - - //set parents and children - concensusTree->tree[j].setChildren(left, right); - concensusTree->tree[left].setParent(j); - concensusTree->tree[right].setParent(j); - - // set Branchlengths // - //if your children are leaves remove their branchlengths - if (concensusTree->tree[left].getLChild() == -1) { concensusTree->tree[left].setBranchLength(1.0); } - if (concensusTree->tree[right].getLChild() == -1) { concensusTree->tree[right].setBranchLength(1.0); } - - //set your branch length to the percentage of trees with this pairing - float bLength = (float) it2->second / (float) t.size(); - concensusTree->tree[j].setBranchLength(bLength); - - //print out set used - string leftName, rightName; - getNames(it2->first, leftName, rightName); - - out2 << "Node " << j+1 << " in concensus tree: " << leftName << "-" << rightName << '\t' << (float)it2->second / (float) t.size() << endl; - out2 << "Node " << j+1 << " sets not included in concensus tree: " << endl; - - //print out sets not included - for (it = nodePairs[j-numLeaves].begin(); it != nodePairs[j-numLeaves].end(); it++) { - if (it != it2) { - getNames(it->first, leftName, rightName); - out2 << leftName << "-" << rightName << '\t' << (float)it->second / (float) t.size() << endl; + it2 = nodePairs.find(treeSet); + + nodePairsInTree[treeSet] = it2->second; + + //erase treeset because you are adding it + nodePairs.erase(treeSet); + + //set count to numLeaves; + count = numLeaves; + + buildConcensusTree(treeSet); + + concensusTree->assembleTree(); + + //output species in order + out2 << "Species in Order: " << endl << endl; + for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; } + + vector temp; + //output sets included + out2 << endl << "Sets included in the concensus tree:" << endl << endl; + for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) { + //only output pairs not leaves + if (it2->first.size() > 1) { + temp.clear(); + //initialize temp to all "." + temp.resize(treeSet.size(), "."); + + //set the spot in temp that represents it2->first[i] to a "*" + for (int i = 0; i < it2->first.size(); i++) { + //find spot + int index = findSpot(it2->first[i]); + temp[index] = "*"; + } + + //output temp + for (int j = 0; j < temp.size(); j++) { + out2 << temp[j]; } + out2 << '\t' << it2->second << endl; } - out2 << endl; - } - concensusTree->assembleTree(); + //output sets not included + out2 << endl << "Sets NOT included in the concensus tree:" << endl << endl; + for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) { + temp.clear(); + //initialize temp to all "." + temp.resize(treeSet.size(), "."); + + //set the spot in temp that represents it2->first[i] to a "*" + for (int i = 0; i < it2->first.size(); i++) { + //find spot + int index = findSpot(it2->first[i]); + temp[index] = "*"; + } + + //output temp + for (int j = 0; j < temp.size(); j++) { + out2 << temp[j]; + } + out2 << '\t' << it2->second << endl; + } outputFile = getRootName(globaldata->inputFileName) + "concensus.tre"; openOutputFile(outputFile, out); - concensusTree->print(out); + concensusTree->printForBoot(out); out.close(); out2.close(); - delete concensusTree; + delete concensusTree; return 0; } @@ -149,36 +143,226 @@ int ConcensusCommand::execute(){ exit(1); } } + //********************************************************************************************************************** +int ConcensusCommand::buildConcensusTree(vector nodeSet) { + try { + vector leftChildSet; + vector rightChildSet; + + //if you are at a leaf + if (nodeSet.size() == 1) { + //return the vector index of the leaf you are at + return concensusTree->getIndex(nodeSet[0]); + //terminate recursion + }else if (count == numNodes) { return 0; } + else { + leftChildSet = getNextAvailableSet(nodeSet); + rightChildSet = getRestSet(nodeSet, leftChildSet); + int left = buildConcensusTree(leftChildSet); + int right = buildConcensusTree(rightChildSet); + concensusTree->tree[count].setChildren(left, right); + concensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]); + concensusTree->tree[left].setParent(count); + concensusTree->tree[right].setParent(count); + count++; + return (count-1); + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function buildConcensusTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ConcensusCommand class function buildConcensusTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} -void ConcensusCommand::getNames(string pair, string& leftName, string& rightName) { +//********************************************************************************************************************** +void ConcensusCommand::getSets() { try { - int pos = pair.find_first_of('-'); - string lefts = pair.substr(0, pos); - string rights = pair.substr(pos+1, pair.length()); - - //converts string to int - int left = atoi(lefts.c_str()); - int right = atoi(rights.c_str()); - - //get name - leftName = concensusTree->tree[left].getName(); - rightName = concensusTree->tree[right].getName(); - - //if you are not a leaf use node number as name - if (leftName == "") { leftName = toString(left+1); } - if (rightName == "") { rightName = toString(right+1); } + vector temp; + treeSet.clear(); + + //for each tree add the possible pairs you find + for (int i = 0; i < t.size(); i++) { + + //for each non-leaf node get descendant info. + for (int j = numLeaves; j < numNodes; j++) { + temp.clear(); + //go through pcounts and pull out descendants + for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) { + temp.push_back(it->first); + } + + //sort temp + sort(temp.begin(), temp.end()); + + it2 = nodePairs.find(temp); + if (it2 != nodePairs.end()) { + nodePairs[temp]++; + }else{ + nodePairs[temp] = 1; + } + } + } + + //add each leaf to terminate recursion in concensus + //you want the leaves in there but with insignifigant sightings value so it is added last + //for each leaf node get descendant info. + for (int j = 0; j < numLeaves; j++) { + + //only need the first one since leaves have no descendants but themselves + it = t[0]->tree[j].pcount.begin(); + temp.clear(); temp.push_back(it->first); + + //fill treeSet + treeSet.push_back(it->first); + + //add leaf to list but with sighting value less then all non leaf pairs + nodePairs[temp] = 0; + } + + sort(treeSet.begin(), treeSet.end()); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getSets. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the ConcensusCommand class function getNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the ConcensusCommand class function getSets. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } //********************************************************************************************************************** +vector ConcensusCommand::getNextAvailableSet(vector bigset) { + try { + vector largest; largest.clear(); + int largestSighting = -1; + + //go through the sets + for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) { + //are you a subset of bigset + if (isSubset(bigset, it2->first) == true) { + + //are you the largest. if you are the same size as current largest refer to sighting + if (it2->first.size() > largest.size()) { largest = it2->first; largestSighting = it2->second; } + else if (it2->first.size() == largest.size()) { + if (it2->second > largestSighting) { largest = it2->first; largestSighting = it2->second; } + } + + } + } + + //save for printing out later and for branch lengths + nodePairsInTree[largest] = nodePairs[largest]; + + //delete whatever set you return because it is no longer available + nodePairs.erase(largest); + + return largest; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getNextAvailableSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ConcensusCommand class function getNextAvailableSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + +} + +//********************************************************************************************************************** +vector ConcensusCommand::getRestSet(vector bigset, vector subset) { + try { + vector rest; + + for (int i = 0; i < bigset.size(); i++) { + bool inSubset = false; + for (int j = 0; j < subset.size(); j++) { + if (bigset[i] == subset[j]) { inSubset = true; break; } + } + + //its not in the subset so put it in the rest + if (inSubset == false) { rest.push_back(bigset[i]); } + } + + //save for printing out later and for branch lengths + nodePairsInTree[rest] = nodePairs[rest]; + + //delete whatever set you return because it is no longer available + nodePairs.erase(rest); + + return rest; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getRestSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ConcensusCommand class function getRestSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + +} + +//********************************************************************************************************************** +bool ConcensusCommand::isSubset(vector bigset, vector subset) { + try { + + //check if each guy in suset is also in bigset + for (int i = 0; i < subset.size(); i++) { + bool match = false; + for (int j = 0; j < bigset.size(); j++) { + if (subset[i] == bigset[j]) { match = true; break; } + } + + //you have a guy in subset that had no match in bigset + if (match == false) { return false; } + } + + return true; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function isSubset. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ConcensusCommand class function isSubset. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** +int ConcensusCommand::findSpot(string node) { + try { + int spot; + + //check if each guy in suset is also in bigset + for (int i = 0; i < treeSet.size(); i++) { + if (treeSet[i] == node) { spot = i; break; } + } + + return spot; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function findSpot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ConcensusCommand class function findSpot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** + + diff --git a/concensuscommand.h b/concensuscommand.h index 5550b3b..cc923dd 100644 --- a/concensuscommand.h +++ b/concensuscommand.h @@ -28,15 +28,25 @@ private: SharedUtil* util; vector t; Tree* concensusTree; - vector< map > nodePairs; // -one entry in vector for each internal node. - // i.e. if node 7's child pairs are 1,2 ten times and 1,3 20 times then the map would be [12, 10] and [13, 20]; + vector treeSet; //set containing all members of the tree to start recursion. filled in getSets(). + map< vector, int > nodePairs; //, int > nodePairsInTree; map::iterator it; - map::iterator it2; + map< vector, int>::iterator it2; string outputFile, notIncluded; ofstream out, out2; - int numNodes, numLeaves; + int numNodes, numLeaves, count; //count is the next available spot in the tree vector + + void getSets(); + vector getNextAvailableSet(vector); //gets next largest and highest rated set that is a subset of the set passed in. + vector getRestSet(vector, vector); + bool isSubset(vector, vector); + int findSpot(string); + int buildConcensusTree(vector); - void getNames(string, string&, string&); }; #endif diff --git a/tree.cpp b/tree.cpp index e138121..a6c9c07 100644 --- a/tree.cpp +++ b/tree.cpp @@ -457,7 +457,7 @@ void Tree::randomTopology() { void Tree::print(ostream& out) { try { int root = findRoot(); - printBranch(root, out); + printBranch(root, out, "branch"); out << ";" << endl; } catch(exception& e) { @@ -469,6 +469,23 @@ void Tree::print(ostream& out) { exit(1); } } +/*****************************************************************/ +void Tree::printForBoot(ostream& out) { + try { + int root = findRoot(); + printBranch(root, out, "boot"); + out << ";" << endl; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printForBoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the Tree class function printForBoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + /*****************************************************************/ // This prints out the tree in Newick form. void Tree::createNewickFile(string f) { @@ -478,7 +495,7 @@ void Tree::createNewickFile(string f) { filename = f; openOutputFile(filename, out); - printBranch(root, out); + printBranch(root, out, "branch"); // you are at the end of the tree out << ";" << endl; @@ -516,25 +533,39 @@ int Tree::findRoot() { } /*****************************************************************/ -void Tree::printBranch(int node, ostream& out) { +void Tree::printBranch(int node, ostream& out, string mode) { try { // you are not a leaf if (tree[node].getLChild() != -1) { out << "("; - printBranch(tree[node].getLChild(), out); + printBranch(tree[node].getLChild(), out, mode); out << ","; - printBranch(tree[node].getRChild(), out); + printBranch(tree[node].getRChild(), out, mode); out << ")"; - //if there is a branch length then print it - if (tree[node].getBranchLength() != -1) { - out << ":" << tree[node].getBranchLength(); + if (mode == "branch") { + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } + }else if (mode == "boot") { + //if there is a label then print it + if (tree[node].getLabel() != -1) { + out << tree[node].getLabel(); + } } }else { //you are a leaf out << tree[node].getGroup(); - //if there is a branch length then print it - if (tree[node].getBranchLength() != -1) { - out << ":" << tree[node].getBranchLength(); + if (mode == "branch") { + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } + }else if (mode == "boot") { + //if there is a label then print it + if (tree[node].getLabel() != -1) { + out << tree[node].getLabel(); + } } } diff --git a/tree.h b/tree.h index cbbad66..8b77d3d 100644 --- a/tree.h +++ b/tree.h @@ -35,6 +35,7 @@ public: map mergeUserGroups(int, vector); //returns a map with a groupname and the number of times that group was seen in the children void printTree(); void print(ostream&); + void printForBoot(ostream&); //this function takes the leaf info and populates the non leaf nodes void assembleTree(); @@ -55,7 +56,7 @@ private: void randomLabels(vector); void randomLabels(string, string); int findRoot(); //return index of root node - void printBranch(int, ostream&); //recursively print out tree + void printBranch(int, ostream&, string); //recursively print out tree }; #endif diff --git a/treenode.cpp b/treenode.cpp index 59fb630..fc6ea3a 100644 --- a/treenode.cpp +++ b/treenode.cpp @@ -19,6 +19,7 @@ Node::Node() { lchild = -1; rchild = -1; length2leaf = 0.0; + label = -1; } /****************************************************************/ @@ -28,6 +29,8 @@ void Node::setGroup(string groups) { group =groups; } /****************************************************************/ void Node::setBranchLength(float l) { branchLength = l; } /****************************************************************/ +void Node::setLabel(float l) { label = l; } +/****************************************************************/ void Node::setLengthToLeaves(float l) { length2leaf = l; } /****************************************************************/ void Node::setParent(int p) { parent = p; } @@ -42,6 +45,8 @@ string Node::getGroup() { return group; } /****************************************************************/ float Node::getBranchLength() { return branchLength; } /****************************************************************/ +float Node::getLabel() { return label; } +/****************************************************************/ float Node::getLengthToLeaves() { return length2leaf; } /****************************************************************/ int Node::getParent() { return parent; } diff --git a/treenode.h b/treenode.h index 58d967d..df30cd3 100644 --- a/treenode.h +++ b/treenode.h @@ -25,6 +25,7 @@ class Node { void setName(string); void setGroup(string); void setBranchLength(float); + void setLabel(float); void setParent(int); void setChildren(int, int); //leftchild, rightchild void setIndex(int); @@ -34,6 +35,7 @@ class Node { string getGroup(); float getBranchLength(); float getLengthToLeaves(); + float getLabel(); int getParent(); int getLChild(); int getRChild(); @@ -53,7 +55,7 @@ class Node { private: string name; string group; - float branchLength, length2leaf; + float branchLength, length2leaf, label; int parent; int lchild; int rchild; -- 2.39.2