375AA13A0F9E433D008EF9B8 /* readotu.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 375AA1350F9E433D008EF9B8 /* readotu.cpp */; };
375AA13B0F9E433D008EF9B8 /* readphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 375AA1370F9E433D008EF9B8 /* readphylip.cpp */; };
377326650FAF16E0007ABB8B /* concensuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 377326630FAF16E0007ABB8B /* concensuscommand.cpp */; };
+ 378C1B030FB0644E004D63F5 /* filterseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */; };
+ 378C1B040FB0644E004D63F5 /* goodscoverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */; };
+ 378C1B050FB0644E004D63F5 /* readclustal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF20FB0644D004D63F5 /* readclustal.cpp */; };
+ 378C1B060FB0644E004D63F5 /* readfasta.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF40FB0644D004D63F5 /* readfasta.cpp */; };
+ 378C1B070FB0644E004D63F5 /* readnexus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF60FB0644D004D63F5 /* readnexus.cpp */; };
+ 378C1B080FB0644E004D63F5 /* readseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */; };
+ 378C1B090FB0644E004D63F5 /* readseqsphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */; };
+ 378C1B0A0FB0644E004D63F5 /* sequencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */; };
+ 378C1B0B0FB0644E004D63F5 /* sharedjackknife.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */; };
+ 378C1B0C0FB0644E004D63F5 /* sharedmarczewski.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */; };
379293C30F2DE73400B9034A /* treemap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379293C20F2DE73400B9034A /* treemap.cpp */; };
379294700F2E191800B9034A /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3792946F0F2E191800B9034A /* parsimonycommand.cpp */; };
3792948A0F2E258500B9034A /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379294890F2E258500B9034A /* parsimony.cpp */; };
375AA1380F9E433D008EF9B8 /* readphylip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylip.h; sourceTree = "<group>"; };
377326630FAF16E0007ABB8B /* concensuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = concensuscommand.cpp; sourceTree = "<group>"; };
377326640FAF16E0007ABB8B /* concensuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = concensuscommand.h; sourceTree = "<group>"; };
+ 378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = filterseqscommand.cpp; sourceTree = "<group>"; };
+ 378C1AEF0FB0644D004D63F5 /* filterseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filterseqscommand.h; sourceTree = "<group>"; };
+ 378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = goodscoverage.cpp; sourceTree = "<group>"; };
+ 378C1AF10FB0644D004D63F5 /* goodscoverage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = goodscoverage.h; sourceTree = "<group>"; };
+ 378C1AF20FB0644D004D63F5 /* readclustal.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readclustal.cpp; sourceTree = "<group>"; };
+ 378C1AF30FB0644D004D63F5 /* readclustal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readclustal.h; sourceTree = "<group>"; };
+ 378C1AF40FB0644D004D63F5 /* readfasta.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readfasta.cpp; sourceTree = "<group>"; };
+ 378C1AF50FB0644D004D63F5 /* readfasta.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readfasta.h; sourceTree = "<group>"; };
+ 378C1AF60FB0644D004D63F5 /* readnexus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readnexus.cpp; sourceTree = "<group>"; };
+ 378C1AF70FB0644D004D63F5 /* readnexus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readnexus.h; sourceTree = "<group>"; };
+ 378C1AF80FB0644D004D63F5 /* readnexusal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readnexusal.h; sourceTree = "<group>"; };
+ 378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readseqscommand.cpp; sourceTree = "<group>"; };
+ 378C1AFA0FB0644D004D63F5 /* readseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readseqscommand.h; sourceTree = "<group>"; };
+ 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readseqsphylip.cpp; sourceTree = "<group>"; };
+ 378C1AFC0FB0644D004D63F5 /* readseqsphylip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readseqsphylip.h; sourceTree = "<group>"; };
+ 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequencedb.cpp; sourceTree = "<group>"; };
+ 378C1AFE0FB0644D004D63F5 /* sequencedb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequencedb.h; sourceTree = "<group>"; };
+ 378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedjackknife.cpp; sourceTree = "<group>"; };
+ 378C1B000FB0644D004D63F5 /* sharedjackknife.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedjackknife.h; sourceTree = "<group>"; };
+ 378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedmarczewski.cpp; sourceTree = "<group>"; };
+ 378C1B020FB0644D004D63F5 /* sharedmarczewski.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedmarczewski.h; sourceTree = "<group>"; };
379293C10F2DE73400B9034A /* treemap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treemap.h; sourceTree = "<group>"; };
379293C20F2DE73400B9034A /* treemap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treemap.cpp; sourceTree = "<group>"; };
3792946E0F2E191800B9034A /* parsimonycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsimonycommand.h; sourceTree = "<group>"; };
37D928060F21331F001D4494 /* rarefact.h */,
37D928050F21331F001D4494 /* rarefact.cpp */,
37D928090F21331F001D4494 /* rarefactioncurvedata.h */,
+ 378C1AF30FB0644D004D63F5 /* readclustal.h */,
+ 378C1AF20FB0644D004D63F5 /* readclustal.cpp */,
375AA1340F9E433D008EF9B8 /* readcolumn.h */,
375AA1330F9E433D008EF9B8 /* readcolumn.cpp */,
+ 378C1AF50FB0644D004D63F5 /* readfasta.h */,
+ 378C1AF40FB0644D004D63F5 /* readfasta.cpp */,
37D928130F21331F001D4494 /* readmatrix.hpp */,
+ 378C1AF70FB0644D004D63F5 /* readnexus.h */,
+ 378C1AF60FB0644D004D63F5 /* readnexus.cpp */,
+ 378C1AF80FB0644D004D63F5 /* readnexusal.h */,
+ 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */,
+ 378C1AFE0FB0644D004D63F5 /* sequencedb.h */,
375AA1360F9E433D008EF9B8 /* readotu.h */,
375AA1350F9E433D008EF9B8 /* readotu.cpp */,
375AA1380F9E433D008EF9B8 /* readphylip.h */,
375AA1370F9E433D008EF9B8 /* readphylip.cpp */,
+ 378C1AFC0FB0644D004D63F5 /* readseqsphylip.h */,
+ 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */,
37AD4DC80F28F3DD00AA2D49 /* readtree.h */,
37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */,
37D9281D0F21331F001D4494 /* sequence.hpp */,
7EC3D4500FA0FFF900338DA5 /* coverage.cpp */,
EB9303F70F53517300E8EF26 /* geom.h */,
EB9303F80F53517300E8EF26 /* geom.cpp */,
+ 378C1AF10FB0644D004D63F5 /* goodscoverage.h */,
+ 378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */,
EB9303E90F534D9400E8EF26 /* logsd.h */,
EB9303EA0F534D9400E8EF26 /* logsd.cpp */,
EB6E68170F5F1C780044E17B /* qstat.h */,
37D928240F21331F001D4494 /* sharedchao1.cpp */,
37D928290F21331F001D4494 /* sharedjabund.h */,
37D928280F21331F001D4494 /* sharedjabund.cpp */,
+ 378C1B000FB0644D004D63F5 /* sharedjackknife.h */,
+ 378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */,
37D9282B0F21331F001D4494 /* sharedjclass.h */,
37D9282A0F21331F001D4494 /* sharedjclass.cpp */,
37D9282D0F21331F001D4494 /* sharedjest.h */,
375874020F7D64EF0040F377 /* sharedkulczynskicody.cpp */,
375874010F7D64EF0040F377 /* sharedlennon.h */,
375873FF0F7D64EF0040F377 /* sharedlennon.cpp */,
+ 378C1B020FB0644D004D63F5 /* sharedmarczewski.h */,
+ 378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */,
375874070F7D64FC0040F377 /* sharedmorisitahorn.h */,
375874080F7D64FC0040F377 /* sharedmorisitahorn.cpp */,
375874090F7D64FC0040F377 /* sharedochiai.h */,
377326630FAF16E0007ABB8B /* concensuscommand.cpp */,
37B28F660F27590100808A62 /* deconvolutecommand.h */,
37B28F670F27590100808A62 /* deconvolutecommand.cpp */,
+ 378C1AEF0FB0644D004D63F5 /* filterseqscommand.h */,
+ 378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */,
A70B53A50F4CD7AD0064797E /* getgroupcommand.h */,
A70B53A40F4CD7AD0064797E /* getgroupcommand.cpp */,
A70B53A70F4CD7AD0064797E /* getlabelcommand.h */,
372E12950F263D5A0095CF7E /* readdistcommand.cpp */,
372E126E0F26365B0095CF7E /* readotucommand.h */,
372E126F0F26365B0095CF7E /* readotucommand.cpp */,
+ 378C1AFA0FB0644D004D63F5 /* readseqscommand.h */,
+ 378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */,
37E5F4900F2A3DA800F8D827 /* readtreecommand.h */,
37E5F4910F2A3DA800F8D827 /* readtreecommand.cpp */,
37D928270F21331F001D4494 /* sharedcommand.h */,
7EC3D4550FA0FFF900338DA5 /* coverage.cpp in Sources */,
7EC3D4560FA0FFF900338DA5 /* whittaker.cpp in Sources */,
377326650FAF16E0007ABB8B /* concensuscommand.cpp in Sources */,
+ 378C1B030FB0644E004D63F5 /* filterseqscommand.cpp in Sources */,
+ 378C1B040FB0644E004D63F5 /* goodscoverage.cpp in Sources */,
+ 378C1B050FB0644E004D63F5 /* readclustal.cpp in Sources */,
+ 378C1B060FB0644E004D63F5 /* readfasta.cpp in Sources */,
+ 378C1B070FB0644E004D63F5 /* readnexus.cpp in Sources */,
+ 378C1B080FB0644E004D63F5 /* readseqscommand.cpp in Sources */,
+ 378C1B090FB0644E004D63F5 /* readseqsphylip.cpp in Sources */,
+ 378C1B0A0FB0644E004D63F5 /* sequencedb.cpp in Sources */,
+ 378C1B0B0FB0644E004D63F5 /* sharedjackknife.cpp in Sources */,
+ 378C1B0C0FB0644E004D63F5 /* sharedmarczewski.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
numLeaves = t[0]->getNumLeaves();
}
- //initialize nodepairs
- nodePairs.resize(numNodes-numLeaves);
-
- //process each tree and fill counts
- for (int i = 0; i < t.size(); i++) {
- //process each nonleaf node
- for (int j = numLeaves; j < numNodes; j++) {
- //get pairing
- int leftChild = t[i]->tree[j].getLChild();
- int rightChild = t[i]->tree[j].getRChild();
- string pair = toString(leftChild) + "-" + toString(rightChild);
-
- //if this is an existing pairing for this node then increment the count otherwise add new pairing.
- it = nodePairs[j-numLeaves].find(pair);
- if (it != nodePairs[j-numLeaves].end()) {//already have that score
- nodePairs[j-numLeaves][pair]++;
- }else{//first time we have seen this score
- nodePairs[j-numLeaves][pair] = 1;
- }
- }
- }
-
+ //get the possible pairings
+ getSets();
//print out pairings for testing
- /*for (int i = 0; i < nodePairs.size(); i++) {
- cout << "pairs for node " << i+numLeaves << endl;
- for (it = nodePairs[i].begin(); it != nodePairs[i].end(); it++) {
- cout << " pair = " << it->first << " count = " << it->second << endl;
+ /*cout << "possible pairing " << endl;
+ for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+ for (int i = 0; i < it2->first.size(); i++) {
+ cout << it2->first[i] << " ";
}
+ cout << '\t' << it2->second << endl;
}*/
+
//open file for pairing not included in the tree
notIncluded = getRootName(globaldata->inputFileName) + "concensuspairs";
openOutputFile(notIncluded, out2);
concensusTree = new Tree();
- //set relationships for nonleaf nodes
- for (int j = numLeaves; j < numNodes; j++) {
-
- //find that nodes pairing with the highest count
- int large = 0;
- for (it = nodePairs[j-numLeaves].begin(); it != nodePairs[j-numLeaves].end(); it++) {
- if (it->second > large) { large = it->second; it2 = it; }
- }
-
- string pair = it2->first;
- int pos = pair.find_first_of('-');
- string lefts = pair.substr(0, pos);
- string rights = pair.substr(pos+1, pair.length());
-
- //converts string to int
- int left = atoi(lefts.c_str());
- int right = atoi(rights.c_str());
-
- //set parents and children
- concensusTree->tree[j].setChildren(left, right);
- concensusTree->tree[left].setParent(j);
- concensusTree->tree[right].setParent(j);
-
- // set Branchlengths //
- //if your children are leaves remove their branchlengths
- if (concensusTree->tree[left].getLChild() == -1) { concensusTree->tree[left].setBranchLength(1.0); }
- if (concensusTree->tree[right].getLChild() == -1) { concensusTree->tree[right].setBranchLength(1.0); }
-
- //set your branch length to the percentage of trees with this pairing
- float bLength = (float) it2->second / (float) t.size();
- concensusTree->tree[j].setBranchLength(bLength);
-
- //print out set used
- string leftName, rightName;
- getNames(it2->first, leftName, rightName);
-
- out2 << "Node " << j+1 << " in concensus tree: " << leftName << "-" << rightName << '\t' << (float)it2->second / (float) t.size() << endl;
- out2 << "Node " << j+1 << " sets not included in concensus tree: " << endl;
-
- //print out sets not included
- for (it = nodePairs[j-numLeaves].begin(); it != nodePairs[j-numLeaves].end(); it++) {
- if (it != it2) {
- getNames(it->first, leftName, rightName);
- out2 << leftName << "-" << rightName << '\t' << (float)it->second / (float) t.size() << endl;
+ it2 = nodePairs.find(treeSet);
+
+ nodePairsInTree[treeSet] = it2->second;
+
+ //erase treeset because you are adding it
+ nodePairs.erase(treeSet);
+
+ //set count to numLeaves;
+ count = numLeaves;
+
+ buildConcensusTree(treeSet);
+
+ concensusTree->assembleTree();
+
+ //output species in order
+ out2 << "Species in Order: " << endl << endl;
+ for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; }
+
+ vector<string> temp;
+ //output sets included
+ out2 << endl << "Sets included in the concensus tree:" << endl << endl;
+ for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
+ //only output pairs not leaves
+ if (it2->first.size() > 1) {
+ temp.clear();
+ //initialize temp to all "."
+ temp.resize(treeSet.size(), ".");
+
+ //set the spot in temp that represents it2->first[i] to a "*"
+ for (int i = 0; i < it2->first.size(); i++) {
+ //find spot
+ int index = findSpot(it2->first[i]);
+ temp[index] = "*";
+ }
+
+ //output temp
+ for (int j = 0; j < temp.size(); j++) {
+ out2 << temp[j];
}
+ out2 << '\t' << it2->second << endl;
}
- out2 << endl;
-
}
- concensusTree->assembleTree();
+ //output sets not included
+ out2 << endl << "Sets NOT included in the concensus tree:" << endl << endl;
+ for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+ temp.clear();
+ //initialize temp to all "."
+ temp.resize(treeSet.size(), ".");
+
+ //set the spot in temp that represents it2->first[i] to a "*"
+ for (int i = 0; i < it2->first.size(); i++) {
+ //find spot
+ int index = findSpot(it2->first[i]);
+ temp[index] = "*";
+ }
+
+ //output temp
+ for (int j = 0; j < temp.size(); j++) {
+ out2 << temp[j];
+ }
+ out2 << '\t' << it2->second << endl;
+ }
outputFile = getRootName(globaldata->inputFileName) + "concensus.tre";
openOutputFile(outputFile, out);
- concensusTree->print(out);
+ concensusTree->printForBoot(out);
out.close(); out2.close();
- delete concensusTree;
+ delete concensusTree;
return 0;
}
exit(1);
}
}
+
//**********************************************************************************************************************
+int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
+ try {
+ vector<string> leftChildSet;
+ vector<string> rightChildSet;
+
+ //if you are at a leaf
+ if (nodeSet.size() == 1) {
+ //return the vector index of the leaf you are at
+ return concensusTree->getIndex(nodeSet[0]);
+ //terminate recursion
+ }else if (count == numNodes) { return 0; }
+ else {
+ leftChildSet = getNextAvailableSet(nodeSet);
+ rightChildSet = getRestSet(nodeSet, leftChildSet);
+ int left = buildConcensusTree(leftChildSet);
+ int right = buildConcensusTree(rightChildSet);
+ concensusTree->tree[count].setChildren(left, right);
+ concensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]);
+ concensusTree->tree[left].setParent(count);
+ concensusTree->tree[right].setParent(count);
+ count++;
+ return (count-1);
+ }
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function buildConcensusTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the ConcensusCommand class function buildConcensusTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
-void ConcensusCommand::getNames(string pair, string& leftName, string& rightName) {
+//**********************************************************************************************************************
+void ConcensusCommand::getSets() {
try {
- int pos = pair.find_first_of('-');
- string lefts = pair.substr(0, pos);
- string rights = pair.substr(pos+1, pair.length());
-
- //converts string to int
- int left = atoi(lefts.c_str());
- int right = atoi(rights.c_str());
-
- //get name
- leftName = concensusTree->tree[left].getName();
- rightName = concensusTree->tree[right].getName();
-
- //if you are not a leaf use node number as name
- if (leftName == "") { leftName = toString(left+1); }
- if (rightName == "") { rightName = toString(right+1); }
+ vector<string> temp;
+ treeSet.clear();
+
+ //for each tree add the possible pairs you find
+ for (int i = 0; i < t.size(); i++) {
+
+ //for each non-leaf node get descendant info.
+ for (int j = numLeaves; j < numNodes; j++) {
+ temp.clear();
+ //go through pcounts and pull out descendants
+ for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
+ temp.push_back(it->first);
+ }
+
+ //sort temp
+ sort(temp.begin(), temp.end());
+
+ it2 = nodePairs.find(temp);
+ if (it2 != nodePairs.end()) {
+ nodePairs[temp]++;
+ }else{
+ nodePairs[temp] = 1;
+ }
+ }
+ }
+
+ //add each leaf to terminate recursion in concensus
+ //you want the leaves in there but with insignifigant sightings value so it is added last
+ //for each leaf node get descendant info.
+ for (int j = 0; j < numLeaves; j++) {
+
+ //only need the first one since leaves have no descendants but themselves
+ it = t[0]->tree[j].pcount.begin();
+ temp.clear(); temp.push_back(it->first);
+
+ //fill treeSet
+ treeSet.push_back(it->first);
+
+ //add leaf to list but with sighting value less then all non leaf pairs
+ nodePairs[temp] = 0;
+ }
+
+ sort(treeSet.begin(), treeSet.end());
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getSets. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
exit(1);
}
catch(...) {
- cout << "An unknown error has occurred in the ConcensusCommand class function getNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ cout << "An unknown error has occurred in the ConcensusCommand class function getSets. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
exit(1);
}
}
//**********************************************************************************************************************
+vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset) {
+ try {
+ vector<string> largest; largest.clear();
+ int largestSighting = -1;
+
+ //go through the sets
+ for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+ //are you a subset of bigset
+ if (isSubset(bigset, it2->first) == true) {
+
+ //are you the largest. if you are the same size as current largest refer to sighting
+ if (it2->first.size() > largest.size()) { largest = it2->first; largestSighting = it2->second; }
+ else if (it2->first.size() == largest.size()) {
+ if (it2->second > largestSighting) { largest = it2->first; largestSighting = it2->second; }
+ }
+
+ }
+ }
+
+ //save for printing out later and for branch lengths
+ nodePairsInTree[largest] = nodePairs[largest];
+
+ //delete whatever set you return because it is no longer available
+ nodePairs.erase(largest);
+
+ return largest;
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getNextAvailableSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the ConcensusCommand class function getNextAvailableSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+
+}
+
+//**********************************************************************************************************************
+vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
+ try {
+ vector<string> rest;
+
+ for (int i = 0; i < bigset.size(); i++) {
+ bool inSubset = false;
+ for (int j = 0; j < subset.size(); j++) {
+ if (bigset[i] == subset[j]) { inSubset = true; break; }
+ }
+
+ //its not in the subset so put it in the rest
+ if (inSubset == false) { rest.push_back(bigset[i]); }
+ }
+
+ //save for printing out later and for branch lengths
+ nodePairsInTree[rest] = nodePairs[rest];
+
+ //delete whatever set you return because it is no longer available
+ nodePairs.erase(rest);
+
+ return rest;
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getRestSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the ConcensusCommand class function getRestSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+
+}
+
+//**********************************************************************************************************************
+bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
+ try {
+
+ //check if each guy in suset is also in bigset
+ for (int i = 0; i < subset.size(); i++) {
+ bool match = false;
+ for (int j = 0; j < bigset.size(); j++) {
+ if (subset[i] == bigset[j]) { match = true; break; }
+ }
+
+ //you have a guy in subset that had no match in bigset
+ if (match == false) { return false; }
+ }
+
+ return true;
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function isSubset. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the ConcensusCommand class function isSubset. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ConcensusCommand::findSpot(string node) {
+ try {
+ int spot;
+
+ //check if each guy in suset is also in bigset
+ for (int i = 0; i < treeSet.size(); i++) {
+ if (treeSet[i] == node) { spot = i; break; }
+ }
+
+ return spot;
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function findSpot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the ConcensusCommand class function findSpot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
SharedUtil* util;
vector<Tree*> t;
Tree* concensusTree;
- vector< map<string, int> > nodePairs; //<maps a pair of nodes joined, number of times that pair occurred> -one entry in vector for each internal node.
- // i.e. if node 7's child pairs are 1,2 ten times and 1,3 20 times then the map would be [12, 10] and [13, 20];
+ vector<string> treeSet; //set containing all members of the tree to start recursion. filled in getSets().
+ map< vector<string>, int > nodePairs; //<map of possible combinations these combos are the pcounts or descendants info, to how many times they occured
+ // ie. combos FI and EGK would create nodePairs[vector containing F and I] = 1; nodePairs[vector containing E, G and K] = 1
+ // if you saw the combo FI again in another tree you would then update nodePairs[vector containing F and I] = 2;
+ // requires vectors to be sorted to find key.
+ map< vector<string>, int > nodePairsInTree;
map<string, int>::iterator it;
- map<string, int>::iterator it2;
+ map< vector<string>, int>::iterator it2;
string outputFile, notIncluded;
ofstream out, out2;
- int numNodes, numLeaves;
+ int numNodes, numLeaves, count; //count is the next available spot in the tree vector
+
+ void getSets();
+ vector<string> getNextAvailableSet(vector<string>); //gets next largest and highest rated set that is a subset of the set passed in.
+ vector<string> getRestSet(vector<string>, vector<string>);
+ bool isSubset(vector<string>, vector<string>);
+ int findSpot(string);
+ int buildConcensusTree(vector<string>);
- void getNames(string, string&, string&);
};
#endif
void Tree::print(ostream& out) {
try {
int root = findRoot();
- printBranch(root, out);
+ printBranch(root, out, "branch");
out << ";" << endl;
}
catch(exception& e) {
exit(1);
}
}
+/*****************************************************************/
+void Tree::printForBoot(ostream& out) {
+ try {
+ int root = findRoot();
+ printBranch(root, out, "boot");
+ out << ";" << endl;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printForBoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the Tree class function printForBoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+
/*****************************************************************/
// This prints out the tree in Newick form.
void Tree::createNewickFile(string f) {
filename = f;
openOutputFile(filename, out);
- printBranch(root, out);
+ printBranch(root, out, "branch");
// you are at the end of the tree
out << ";" << endl;
}
/*****************************************************************/
-void Tree::printBranch(int node, ostream& out) {
+void Tree::printBranch(int node, ostream& out, string mode) {
try {
// you are not a leaf
if (tree[node].getLChild() != -1) {
out << "(";
- printBranch(tree[node].getLChild(), out);
+ printBranch(tree[node].getLChild(), out, mode);
out << ",";
- printBranch(tree[node].getRChild(), out);
+ printBranch(tree[node].getRChild(), out, mode);
out << ")";
- //if there is a branch length then print it
- if (tree[node].getBranchLength() != -1) {
- out << ":" << tree[node].getBranchLength();
+ if (mode == "branch") {
+ //if there is a branch length then print it
+ if (tree[node].getBranchLength() != -1) {
+ out << ":" << tree[node].getBranchLength();
+ }
+ }else if (mode == "boot") {
+ //if there is a label then print it
+ if (tree[node].getLabel() != -1) {
+ out << tree[node].getLabel();
+ }
}
}else { //you are a leaf
out << tree[node].getGroup();
- //if there is a branch length then print it
- if (tree[node].getBranchLength() != -1) {
- out << ":" << tree[node].getBranchLength();
+ if (mode == "branch") {
+ //if there is a branch length then print it
+ if (tree[node].getBranchLength() != -1) {
+ out << ":" << tree[node].getBranchLength();
+ }
+ }else if (mode == "boot") {
+ //if there is a label then print it
+ if (tree[node].getLabel() != -1) {
+ out << tree[node].getLabel();
+ }
}
}
map<string, int> mergeUserGroups(int, vector<string>); //returns a map with a groupname and the number of times that group was seen in the children
void printTree();
void print(ostream&);
+ void printForBoot(ostream&);
//this function takes the leaf info and populates the non leaf nodes
void assembleTree();
void randomLabels(vector<string>);
void randomLabels(string, string);
int findRoot(); //return index of root node
- void printBranch(int, ostream&); //recursively print out tree
+ void printBranch(int, ostream&, string); //recursively print out tree
};
#endif
lchild = -1;
rchild = -1;
length2leaf = 0.0;
+ label = -1;
}
/****************************************************************/
/****************************************************************/
void Node::setBranchLength(float l) { branchLength = l; }
/****************************************************************/
+void Node::setLabel(float l) { label = l; }
+/****************************************************************/
void Node::setLengthToLeaves(float l) { length2leaf = l; }
/****************************************************************/
void Node::setParent(int p) { parent = p; }
/****************************************************************/
float Node::getBranchLength() { return branchLength; }
/****************************************************************/
+float Node::getLabel() { return label; }
+/****************************************************************/
float Node::getLengthToLeaves() { return length2leaf; }
/****************************************************************/
int Node::getParent() { return parent; }
void setName(string);
void setGroup(string);
void setBranchLength(float);
+ void setLabel(float);
void setParent(int);
void setChildren(int, int); //leftchild, rightchild
void setIndex(int);
string getGroup();
float getBranchLength();
float getLengthToLeaves();
+ float getLabel();
int getParent();
int getLChild();
int getRChild();
private:
string name;
string group;
- float branchLength, length2leaf;
+ float branchLength, length2leaf, label;
int parent;
int lchild;
int rchild;