]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed concensus command and modified tree class so you can print labels on trees
authorwestcott <westcott>
Thu, 7 May 2009 15:47:53 +0000 (15:47 +0000)
committerwestcott <westcott>
Thu, 7 May 2009 15:47:53 +0000 (15:47 +0000)
Mothur.xcodeproj/project.pbxproj
concensuscommand.cpp
concensuscommand.h
tree.cpp
tree.h
treenode.cpp
treenode.h

index bfed0cb5a71a55ce2a2ea9d9726fbac4d95a4b28..5e6e7920a17077dbb517e0fd0cab6bd36b59dc7c 100644 (file)
                375AA13A0F9E433D008EF9B8 /* readotu.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 375AA1350F9E433D008EF9B8 /* readotu.cpp */; };
                375AA13B0F9E433D008EF9B8 /* readphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 375AA1370F9E433D008EF9B8 /* readphylip.cpp */; };
                377326650FAF16E0007ABB8B /* concensuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 377326630FAF16E0007ABB8B /* concensuscommand.cpp */; };
+               378C1B030FB0644E004D63F5 /* filterseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */; };
+               378C1B040FB0644E004D63F5 /* goodscoverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */; };
+               378C1B050FB0644E004D63F5 /* readclustal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF20FB0644D004D63F5 /* readclustal.cpp */; };
+               378C1B060FB0644E004D63F5 /* readfasta.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF40FB0644D004D63F5 /* readfasta.cpp */; };
+               378C1B070FB0644E004D63F5 /* readnexus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF60FB0644D004D63F5 /* readnexus.cpp */; };
+               378C1B080FB0644E004D63F5 /* readseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */; };
+               378C1B090FB0644E004D63F5 /* readseqsphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */; };
+               378C1B0A0FB0644E004D63F5 /* sequencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */; };
+               378C1B0B0FB0644E004D63F5 /* sharedjackknife.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */; };
+               378C1B0C0FB0644E004D63F5 /* sharedmarczewski.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */; };
                379293C30F2DE73400B9034A /* treemap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379293C20F2DE73400B9034A /* treemap.cpp */; };
                379294700F2E191800B9034A /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3792946F0F2E191800B9034A /* parsimonycommand.cpp */; };
                3792948A0F2E258500B9034A /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379294890F2E258500B9034A /* parsimony.cpp */; };
                375AA1380F9E433D008EF9B8 /* readphylip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylip.h; sourceTree = "<group>"; };
                377326630FAF16E0007ABB8B /* concensuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = concensuscommand.cpp; sourceTree = "<group>"; };
                377326640FAF16E0007ABB8B /* concensuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = concensuscommand.h; sourceTree = "<group>"; };
+               378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = filterseqscommand.cpp; sourceTree = "<group>"; };
+               378C1AEF0FB0644D004D63F5 /* filterseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filterseqscommand.h; sourceTree = "<group>"; };
+               378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = goodscoverage.cpp; sourceTree = "<group>"; };
+               378C1AF10FB0644D004D63F5 /* goodscoverage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = goodscoverage.h; sourceTree = "<group>"; };
+               378C1AF20FB0644D004D63F5 /* readclustal.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readclustal.cpp; sourceTree = "<group>"; };
+               378C1AF30FB0644D004D63F5 /* readclustal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readclustal.h; sourceTree = "<group>"; };
+               378C1AF40FB0644D004D63F5 /* readfasta.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readfasta.cpp; sourceTree = "<group>"; };
+               378C1AF50FB0644D004D63F5 /* readfasta.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readfasta.h; sourceTree = "<group>"; };
+               378C1AF60FB0644D004D63F5 /* readnexus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readnexus.cpp; sourceTree = "<group>"; };
+               378C1AF70FB0644D004D63F5 /* readnexus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readnexus.h; sourceTree = "<group>"; };
+               378C1AF80FB0644D004D63F5 /* readnexusal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readnexusal.h; sourceTree = "<group>"; };
+               378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readseqscommand.cpp; sourceTree = "<group>"; };
+               378C1AFA0FB0644D004D63F5 /* readseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readseqscommand.h; sourceTree = "<group>"; };
+               378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readseqsphylip.cpp; sourceTree = "<group>"; };
+               378C1AFC0FB0644D004D63F5 /* readseqsphylip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readseqsphylip.h; sourceTree = "<group>"; };
+               378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequencedb.cpp; sourceTree = "<group>"; };
+               378C1AFE0FB0644D004D63F5 /* sequencedb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequencedb.h; sourceTree = "<group>"; };
+               378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedjackknife.cpp; sourceTree = "<group>"; };
+               378C1B000FB0644D004D63F5 /* sharedjackknife.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedjackknife.h; sourceTree = "<group>"; };
+               378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedmarczewski.cpp; sourceTree = "<group>"; };
+               378C1B020FB0644D004D63F5 /* sharedmarczewski.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedmarczewski.h; sourceTree = "<group>"; };
                379293C10F2DE73400B9034A /* treemap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treemap.h; sourceTree = "<group>"; };
                379293C20F2DE73400B9034A /* treemap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treemap.cpp; sourceTree = "<group>"; };
                3792946E0F2E191800B9034A /* parsimonycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsimonycommand.h; sourceTree = "<group>"; };
                                37D928060F21331F001D4494 /* rarefact.h */,
                                37D928050F21331F001D4494 /* rarefact.cpp */,
                                37D928090F21331F001D4494 /* rarefactioncurvedata.h */,
+                               378C1AF30FB0644D004D63F5 /* readclustal.h */,
+                               378C1AF20FB0644D004D63F5 /* readclustal.cpp */,
                                375AA1340F9E433D008EF9B8 /* readcolumn.h */,
                                375AA1330F9E433D008EF9B8 /* readcolumn.cpp */,
+                               378C1AF50FB0644D004D63F5 /* readfasta.h */,
+                               378C1AF40FB0644D004D63F5 /* readfasta.cpp */,
                                37D928130F21331F001D4494 /* readmatrix.hpp */,
+                               378C1AF70FB0644D004D63F5 /* readnexus.h */,
+                               378C1AF60FB0644D004D63F5 /* readnexus.cpp */,
+                               378C1AF80FB0644D004D63F5 /* readnexusal.h */,
+                               378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */,
+                               378C1AFE0FB0644D004D63F5 /* sequencedb.h */,
                                375AA1360F9E433D008EF9B8 /* readotu.h */,
                                375AA1350F9E433D008EF9B8 /* readotu.cpp */,
                                375AA1380F9E433D008EF9B8 /* readphylip.h */,
                                375AA1370F9E433D008EF9B8 /* readphylip.cpp */,
+                               378C1AFC0FB0644D004D63F5 /* readseqsphylip.h */,
+                               378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */,
                                37AD4DC80F28F3DD00AA2D49 /* readtree.h */,
                                37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */,
                                37D9281D0F21331F001D4494 /* sequence.hpp */,
                                7EC3D4500FA0FFF900338DA5 /* coverage.cpp */,
                                EB9303F70F53517300E8EF26 /* geom.h */,
                                EB9303F80F53517300E8EF26 /* geom.cpp */,
+                               378C1AF10FB0644D004D63F5 /* goodscoverage.h */,
+                               378C1AF00FB0644D004D63F5 /* goodscoverage.cpp */,
                                EB9303E90F534D9400E8EF26 /* logsd.h */,
                                EB9303EA0F534D9400E8EF26 /* logsd.cpp */,
                                EB6E68170F5F1C780044E17B /* qstat.h */,
                                37D928240F21331F001D4494 /* sharedchao1.cpp */,
                                37D928290F21331F001D4494 /* sharedjabund.h */,
                                37D928280F21331F001D4494 /* sharedjabund.cpp */,
+                               378C1B000FB0644D004D63F5 /* sharedjackknife.h */,
+                               378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */,
                                37D9282B0F21331F001D4494 /* sharedjclass.h */,
                                37D9282A0F21331F001D4494 /* sharedjclass.cpp */,
                                37D9282D0F21331F001D4494 /* sharedjest.h */,
                                375874020F7D64EF0040F377 /* sharedkulczynskicody.cpp */,
                                375874010F7D64EF0040F377 /* sharedlennon.h */,
                                375873FF0F7D64EF0040F377 /* sharedlennon.cpp */,
+                               378C1B020FB0644D004D63F5 /* sharedmarczewski.h */,
+                               378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */,
                                375874070F7D64FC0040F377 /* sharedmorisitahorn.h */,
                                375874080F7D64FC0040F377 /* sharedmorisitahorn.cpp */,
                                375874090F7D64FC0040F377 /* sharedochiai.h */,
                                377326630FAF16E0007ABB8B /* concensuscommand.cpp */,
                                37B28F660F27590100808A62 /* deconvolutecommand.h */,
                                37B28F670F27590100808A62 /* deconvolutecommand.cpp */,
+                               378C1AEF0FB0644D004D63F5 /* filterseqscommand.h */,
+                               378C1AEE0FB0644D004D63F5 /* filterseqscommand.cpp */,
                                A70B53A50F4CD7AD0064797E /* getgroupcommand.h */,
                                A70B53A40F4CD7AD0064797E /* getgroupcommand.cpp */,
                                A70B53A70F4CD7AD0064797E /* getlabelcommand.h */,
                                372E12950F263D5A0095CF7E /* readdistcommand.cpp */,
                                372E126E0F26365B0095CF7E /* readotucommand.h */,
                                372E126F0F26365B0095CF7E /* readotucommand.cpp */,
+                               378C1AFA0FB0644D004D63F5 /* readseqscommand.h */,
+                               378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */,
                                37E5F4900F2A3DA800F8D827 /* readtreecommand.h */,
                                37E5F4910F2A3DA800F8D827 /* readtreecommand.cpp */,
                                37D928270F21331F001D4494 /* sharedcommand.h */,
                                7EC3D4550FA0FFF900338DA5 /* coverage.cpp in Sources */,
                                7EC3D4560FA0FFF900338DA5 /* whittaker.cpp in Sources */,
                                377326650FAF16E0007ABB8B /* concensuscommand.cpp in Sources */,
+                               378C1B030FB0644E004D63F5 /* filterseqscommand.cpp in Sources */,
+                               378C1B040FB0644E004D63F5 /* goodscoverage.cpp in Sources */,
+                               378C1B050FB0644E004D63F5 /* readclustal.cpp in Sources */,
+                               378C1B060FB0644E004D63F5 /* readfasta.cpp in Sources */,
+                               378C1B070FB0644E004D63F5 /* readnexus.cpp in Sources */,
+                               378C1B080FB0644E004D63F5 /* readseqscommand.cpp in Sources */,
+                               378C1B090FB0644E004D63F5 /* readseqsphylip.cpp in Sources */,
+                               378C1B0A0FB0644E004D63F5 /* sequencedb.cpp in Sources */,
+                               378C1B0B0FB0644E004D63F5 /* sharedjackknife.cpp in Sources */,
+                               378C1B0C0FB0644E004D63F5 /* sharedmarczewski.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index cc00cd05d29cd5aa0c143e3c242f22c684288340..01df253a0dfbcbb18d37f9adb1cc241ad3ff78ce 100644 (file)
@@ -40,103 +40,97 @@ int ConcensusCommand::execute(){
                        numLeaves = t[0]->getNumLeaves();
                }
                
-               //initialize nodepairs
-               nodePairs.resize(numNodes-numLeaves);
-               
-               //process each tree and fill counts
-               for (int i = 0; i < t.size(); i++) {
-                       //process each nonleaf node
-                       for (int j = numLeaves; j < numNodes; j++) {
-                               //get pairing
-                               int leftChild = t[i]->tree[j].getLChild();
-                               int rightChild = t[i]->tree[j].getRChild();
-                               string pair = toString(leftChild) + "-" + toString(rightChild);
-                               
-                               //if this is an existing pairing for this node then increment the count otherwise add new pairing.
-                               it = nodePairs[j-numLeaves].find(pair);
-                               if (it != nodePairs[j-numLeaves].end()) {//already have that score
-                                       nodePairs[j-numLeaves][pair]++;
-                               }else{//first time we have seen this score
-                                       nodePairs[j-numLeaves][pair] = 1;
-                               }
-                       }
-               }
-               
+               //get the possible pairings
+               getSets();              
                
                //print out pairings for testing
-               /*for (int i = 0; i < nodePairs.size(); i++) {
-                       cout << "pairs for node " << i+numLeaves << endl;
-                       for (it = nodePairs[i].begin(); it != nodePairs[i].end(); it++) {
-                               cout << " pair = " << it->first <<  " count = " << it->second << endl;
+               /*cout << "possible pairing " <<  endl;
+               for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+                       for (int i = 0; i < it2->first.size(); i++) {
+                               cout << it2->first[i] << " ";
                        }
+                       cout << '\t' << it2->second << endl;
                }*/
                
+               
                //open file for pairing not included in the tree
                notIncluded = getRootName(globaldata->inputFileName) + "concensuspairs";
                openOutputFile(notIncluded, out2);
                
                concensusTree = new Tree();
                
-               //set relationships for nonleaf nodes
-               for (int j = numLeaves; j < numNodes; j++) {
-                       
-                       //find that nodes pairing with the highest count
-                       int large = 0;
-                       for (it = nodePairs[j-numLeaves].begin(); it != nodePairs[j-numLeaves].end(); it++) {
-                               if (it->second > large) { large = it->second;  it2 = it; }
-                       }
-                       
-                       string pair = it2->first;
-                       int pos = pair.find_first_of('-');
-                       string lefts =  pair.substr(0, pos);
-                       string rights =  pair.substr(pos+1, pair.length());
-
-                       //converts string to int
-                       int left = atoi(lefts.c_str());
-                       int right = atoi(rights.c_str());
-                       
-                       //set parents and children
-                       concensusTree->tree[j].setChildren(left, right);
-                       concensusTree->tree[left].setParent(j);
-                       concensusTree->tree[right].setParent(j);
-                       
-                       // set Branchlengths //
-                       //if your children are leaves remove their branchlengths
-                       if (concensusTree->tree[left].getLChild() == -1) {  concensusTree->tree[left].setBranchLength(1.0); }
-                       if (concensusTree->tree[right].getLChild() == -1) {  concensusTree->tree[right].setBranchLength(1.0); }
-                       
-                       //set your branch length to the percentage of trees with this pairing
-                       float bLength = (float) it2->second / (float) t.size();
-                       concensusTree->tree[j].setBranchLength(bLength);
-                       
-                       //print out set used
-                       string leftName, rightName;
-                       getNames(it2->first, leftName, rightName);
-                       
-                       out2 << "Node " << j+1 << " in concensus tree: " << leftName << "-" << rightName << '\t' << (float)it2->second / (float) t.size() << endl; 
-                       out2 << "Node " << j+1 << " sets not included in concensus tree: " << endl; 
-                       
-                       //print out sets not included
-                       for (it = nodePairs[j-numLeaves].begin(); it != nodePairs[j-numLeaves].end(); it++) {
-                               if (it != it2) { 
-                                       getNames(it->first, leftName, rightName);
-                                       out2 << leftName << "-" << rightName << '\t' << (float)it->second / (float) t.size() << endl; 
+               it2 = nodePairs.find(treeSet);
+               
+               nodePairsInTree[treeSet] = it2->second; 
+               
+               //erase treeset because you are adding it
+               nodePairs.erase(treeSet);
+               
+               //set count to numLeaves;
+               count = numLeaves;
+               
+               buildConcensusTree(treeSet);
+               
+               concensusTree->assembleTree();
+               
+               //output species in order
+               out2 << "Species in Order: " << endl << endl;
+               for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
+               
+               vector<string> temp; 
+               //output sets included
+               out2 << endl << "Sets included in the concensus tree:" << endl << endl;
+               for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
+                       //only output pairs not leaves
+                       if (it2->first.size() > 1) { 
+                               temp.clear();
+                               //initialize temp to all "."
+                               temp.resize(treeSet.size(), ".");
+                               
+                               //set the spot in temp that represents it2->first[i] to a "*" 
+                               for (int i = 0; i < it2->first.size(); i++) {
+                                       //find spot 
+                                       int index = findSpot(it2->first[i]);
+                                       temp[index] = "*";
+                               }
+                               
+                               //output temp
+                               for (int j = 0; j < temp.size(); j++) { 
+                                       out2 << temp[j];
                                }
+                               out2 << '\t' << it2->second << endl;
                        }
-                       out2 << endl;
-
                }
                
-               concensusTree->assembleTree();
+               //output sets not included
+               out2 << endl << "Sets NOT included in the concensus tree:" << endl << endl;
+               for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+                       temp.clear();
+                       //initialize temp to all "."
+                       temp.resize(treeSet.size(), ".");
+                               
+                       //set the spot in temp that represents it2->first[i] to a "*" 
+                       for (int i = 0; i < it2->first.size(); i++) {
+                               //find spot 
+                               int index = findSpot(it2->first[i]);
+                               temp[index] = "*";
+                       }
+                               
+                       //output temp
+                       for (int j = 0; j < temp.size(); j++) { 
+                               out2 << temp[j];
+                       }
+                       out2 << '\t' << it2->second << endl;
+               }
                
                outputFile = getRootName(globaldata->inputFileName) + "concensus.tre";
                openOutputFile(outputFile, out);
                
-               concensusTree->print(out);
+               concensusTree->printForBoot(out);
                
                out.close(); out2.close();
                
-               delete concensusTree;
+               delete concensusTree; 
                
                return 0;
        }
@@ -149,36 +143,226 @@ int ConcensusCommand::execute(){
                exit(1);
        }               
 }
+
 //**********************************************************************************************************************
+int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
+       try {
+               vector<string> leftChildSet;
+               vector<string> rightChildSet;
+               
+               //if you are at a leaf
+               if (nodeSet.size() == 1) {
+                       //return the vector index of the leaf you are at
+                       return concensusTree->getIndex(nodeSet[0]);
+               //terminate recursion
+               }else if (count == numNodes) { return 0; }
+               else {
+                       leftChildSet = getNextAvailableSet(nodeSet);
+                       rightChildSet = getRestSet(nodeSet, leftChildSet);
+                       int left = buildConcensusTree(leftChildSet);
+                       int right = buildConcensusTree(rightChildSet);
+                       concensusTree->tree[count].setChildren(left, right);
+                       concensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]); 
+                       concensusTree->tree[left].setParent(count);
+                       concensusTree->tree[right].setParent(count);
+                       count++;
+                       return (count-1);
+               }
+       
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function buildConcensusTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the ConcensusCommand class function buildConcensusTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }               
+}
 
-void ConcensusCommand::getNames(string pair, string& leftName, string& rightName) {
+//**********************************************************************************************************************
+void ConcensusCommand::getSets() {
        try {
-               int pos = pair.find_first_of('-');
-               string lefts =  pair.substr(0, pos);
-               string rights =  pair.substr(pos+1, pair.length());
-
-               //converts string to int
-               int     left = atoi(lefts.c_str());
-               int     right = atoi(rights.c_str());
-                                       
-               //get name
-               leftName = concensusTree->tree[left].getName();  
-               rightName = concensusTree->tree[right].getName();
-                
-               //if you are not a leaf use node number as name
-               if (leftName == "") {  leftName = toString(left+1); }
-               if (rightName == "") {  rightName = toString(right+1); }
+               vector<string> temp;
+               treeSet.clear();
+               
+               //for each tree add the possible pairs you find
+               for (int i = 0; i < t.size(); i++) {
+                       
+                       //for each non-leaf node get descendant info.
+                       for (int j = numLeaves; j < numNodes; j++) {
+                               temp.clear();
+                               //go through pcounts and pull out descendants
+                               for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
+                                       temp.push_back(it->first);
+                               }
+                               
+                               //sort temp
+                               sort(temp.begin(), temp.end());
+                               
+                               it2 = nodePairs.find(temp);
+                               if (it2 != nodePairs.end()) {
+                                       nodePairs[temp]++;
+                               }else{
+                                       nodePairs[temp] = 1;
+                               }
+                       }
+               }
+               
+               //add each leaf to terminate recursion in concensus
+               //you want the leaves in there but with insignifigant sightings value so it is added last
+               //for each leaf node get descendant info.
+               for (int j = 0; j < numLeaves; j++) {
+                       
+                       //only need the first one since leaves have no descendants but themselves
+                       it = t[0]->tree[j].pcount.begin(); 
+                       temp.clear();  temp.push_back(it->first);
+                       
+                       //fill treeSet
+                       treeSet.push_back(it->first);
+                       
+                       //add leaf to list but with sighting value less then all non leaf pairs 
+                       nodePairs[temp] = 0;
+               }
+
+               sort(treeSet.begin(), treeSet.end());
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getSets. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }
        catch(...) {
-               cout << "An unknown error has occurred in the ConcensusCommand class function getNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "An unknown error has occurred in the ConcensusCommand class function getSets. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }               
 }
 
 //**********************************************************************************************************************
+vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset) {
+       try {
+               vector<string> largest; largest.clear();
+               int largestSighting = -1;
+               
+               //go through the sets
+               for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+                       //are you a subset of bigset
+                       if (isSubset(bigset, it2->first) == true) {
+                       
+                               //are you the largest. if you are the same size as current largest refer to sighting
+                               if (it2->first.size() > largest.size()) { largest = it2->first;  largestSighting = it2->second; }
+                               else if (it2->first.size() == largest.size()) {
+                                       if (it2->second > largestSighting) { largest = it2->first;  largestSighting = it2->second; }
+                               }
+                               
+                       }
+               }
+               
+               //save for printing out later and for branch lengths
+               nodePairsInTree[largest] = nodePairs[largest];
+               
+               //delete whatever set you return because it is no longer available
+               nodePairs.erase(largest);
+               
+               return largest;
+       
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getNextAvailableSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the ConcensusCommand class function getNextAvailableSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }               
+
+}
+
+//**********************************************************************************************************************
+vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
+       try {
+               vector<string> rest;
+               
+               for (int i = 0; i < bigset.size(); i++) {
+                       bool inSubset = false;
+                       for (int j = 0; j < subset.size(); j++) {
+                               if (bigset[i] == subset[j]) { inSubset = true; break; }
+                       }
+                       
+                       //its not in the subset so put it in the rest
+                       if (inSubset == false) { rest.push_back(bigset[i]); }
+               }
+               
+               //save for printing out later and for branch lengths
+               nodePairsInTree[rest] = nodePairs[rest];
+               
+               //delete whatever set you return because it is no longer available
+               nodePairs.erase(rest);
+
+               return rest;
+       
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getRestSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the ConcensusCommand class function getRestSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }               
+
+}
+
+//**********************************************************************************************************************
+bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
+       try {
+               
+               //check if each guy in suset is also in bigset
+               for (int i = 0; i < subset.size(); i++) {
+                       bool match = false;
+                       for (int j = 0; j < bigset.size(); j++) {
+                               if (subset[i] == bigset[j]) { match = true; break; }
+                       }
+                       
+                       //you have a guy in subset that had no match in bigset
+                       if (match == false) { return false; }
+               }
+               
+               return true;
+       
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function isSubset. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the ConcensusCommand class function isSubset. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }               
+}
+//**********************************************************************************************************************
+int ConcensusCommand::findSpot(string node) {
+       try {
+               int spot;
+               
+               //check if each guy in suset is also in bigset
+               for (int i = 0; i < treeSet.size(); i++) {
+                       if (treeSet[i] == node) { spot = i; break; }
+               }
+               
+               return spot;
+       
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function findSpot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the ConcensusCommand class function findSpot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }               
+}
+//**********************************************************************************************************************
+
+
 
 
index 5550b3b6efdf04f6098c5481e16c393363487bb6..cc923dd34fde054ea38f7797ac4c3dee64cb1346 100644 (file)
@@ -28,15 +28,25 @@ private:
        SharedUtil* util;
        vector<Tree*> t;
        Tree* concensusTree;
-       vector< map<string, int> > nodePairs;  //<maps a pair of nodes joined, number of times that pair occurred>      -one entry in vector for each internal node.
-                                                                               // i.e. if node 7's child pairs are 1,2 ten times and 1,3 20 times then the map would be [12, 10] and [13, 20];
+       vector<string> treeSet;         //set containing all members of the tree to start recursion.  filled in getSets().
+       map< vector<string>, int > nodePairs;  //<map of possible combinations these combos are the pcounts or descendants info, to how many times they occured
+                                                                               // ie. combos FI and EGK would create nodePairs[vector containing F and I] = 1; nodePairs[vector containing E, G and K] = 1
+                                                                               // if you saw the combo FI again in another tree you would then update nodePairs[vector containing F and I] = 2;
+                                                                               // requires vectors to be sorted to find key.
+       map< vector<string>, int > nodePairsInTree;
        map<string, int>::iterator it;
-       map<string, int>::iterator it2;
+       map< vector<string>, int>::iterator it2;
        string outputFile, notIncluded;
        ofstream out, out2;
-       int numNodes, numLeaves;
+       int numNodes, numLeaves, count;  //count is the next available spot in the tree vector
+                                                                               
+       void getSets();
+       vector<string> getNextAvailableSet(vector<string>);  //gets next largest and highest rated set that is a subset of the set passed in.
+       vector<string> getRestSet(vector<string>, vector<string>);
+       bool isSubset(vector<string>, vector<string>); 
+       int findSpot(string); 
+       int buildConcensusTree(vector<string>);
        
-       void getNames(string, string&, string&);
 };
 
 #endif
index e138121661f19f77d1c8603036793f86a95dfe67..a6c9c07a0714fb850498a3d6f4439d4247602468 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -457,7 +457,7 @@ void Tree::randomTopology() {
 void Tree::print(ostream& out) {
        try {
                int root = findRoot();
-               printBranch(root, out);
+               printBranch(root, out, "branch");
                out << ";" << endl;
        }
        catch(exception& e) {
@@ -469,6 +469,23 @@ void Tree::print(ostream& out) {
                exit(1);
        }               
 }
+/*****************************************************************/
+void Tree::printForBoot(ostream& out) {
+       try {
+               int root = findRoot();
+               printBranch(root, out, "boot");
+               out << ";" << endl;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printForBoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the Tree class function printForBoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }               
+}
+
 /*****************************************************************/
 // This prints out the tree in Newick form.
 void Tree::createNewickFile(string f) {
@@ -478,7 +495,7 @@ void Tree::createNewickFile(string f) {
                filename = f;
                openOutputFile(filename, out);
                
-               printBranch(root, out);
+               printBranch(root, out, "branch");
                
                // you are at the end of the tree
                out << ";" << endl;
@@ -516,25 +533,39 @@ int Tree::findRoot() {
 }
 
 /*****************************************************************/
-void Tree::printBranch(int node, ostream& out) {
+void Tree::printBranch(int node, ostream& out, string mode) {
        try {
                
                // you are not a leaf
                if (tree[node].getLChild() != -1) {
                        out << "(";
-                       printBranch(tree[node].getLChild(), out);
+                       printBranch(tree[node].getLChild(), out, mode);
                        out << ",";
-                       printBranch(tree[node].getRChild(), out);
+                       printBranch(tree[node].getRChild(), out, mode);
                        out << ")";
-                       //if there is a branch length then print it
-                       if (tree[node].getBranchLength() != -1) {
-                               out << ":" << tree[node].getBranchLength();
+                       if (mode == "branch") {
+                               //if there is a branch length then print it
+                               if (tree[node].getBranchLength() != -1) {
+                                       out << ":" << tree[node].getBranchLength();
+                               }
+                       }else if (mode == "boot") {
+                               //if there is a label then print it
+                               if (tree[node].getLabel() != -1) {
+                                       out << tree[node].getLabel();
+                               }
                        }
                }else { //you are a leaf
                        out << tree[node].getGroup(); 
-                       //if there is a branch length then print it
-                       if (tree[node].getBranchLength() != -1) {
-                               out << ":" << tree[node].getBranchLength();
+                       if (mode == "branch") {
+                               //if there is a branch length then print it
+                               if (tree[node].getBranchLength() != -1) {
+                                       out << ":" << tree[node].getBranchLength();
+                               }
+                       }else if (mode == "boot") {
+                               //if there is a label then print it
+                               if (tree[node].getLabel() != -1) {
+                                       out << tree[node].getLabel();
+                               }
                        }
                }
                
diff --git a/tree.h b/tree.h
index cbbad667cbe1c27842ef088f8538efc9b084bc66..8b77d3ddbed48dddb5de57dfa44394118c7a1920 100644 (file)
--- a/tree.h
+++ b/tree.h
@@ -35,6 +35,7 @@ public:
        map<string, int> mergeUserGroups(int, vector<string>);  //returns a map with a groupname and the number of times that group was seen in the children
        void printTree();
        void print(ostream&);
+       void printForBoot(ostream&);
        
        //this function takes the leaf info and populates the non leaf nodes
        void assembleTree();            
@@ -55,7 +56,7 @@ private:
        void randomLabels(vector<string>);
        void randomLabels(string, string);
        int findRoot();  //return index of root node
-       void printBranch(int, ostream&);  //recursively print out tree
+       void printBranch(int, ostream&, string);  //recursively print out tree
 };
 
 #endif
index 59fb630ef11373b671f48dfc72459dbc1fa78ad7..fc6ea3a595ac752e943151a84ce6a538f6d66670 100644 (file)
@@ -19,6 +19,7 @@ Node::Node() {
        lchild = -1;
        rchild = -1;
        length2leaf = 0.0;
+       label = -1;
        
 }
 /****************************************************************/
@@ -28,6 +29,8 @@ void Node::setGroup(string groups)  { group =groups; }
 /****************************************************************/
 void Node::setBranchLength(float l) { branchLength = l; }
 /****************************************************************/
+void Node::setLabel(float l) { label = l; }
+/****************************************************************/
 void Node::setLengthToLeaves(float l) { length2leaf = l; }
 /****************************************************************/
 void Node::setParent(int p)  { parent = p; }
@@ -42,6 +45,8 @@ string Node::getGroup() { return group; }
 /****************************************************************/
 float Node::getBranchLength() { return branchLength; }
 /****************************************************************/
+float Node::getLabel() { return label; }
+/****************************************************************/
 float Node::getLengthToLeaves() { return length2leaf; }
 /****************************************************************/
 int Node::getParent() { return parent; }
index 58d967d5cf297bbcd82e3839822e201a0aed121f..df30cd3a32d5ebc47a54cadf8ea0a6b47c9122f0 100644 (file)
@@ -25,6 +25,7 @@ class Node  {
                void setName(string);
                void setGroup(string);  
                void setBranchLength(float);
+               void setLabel(float);
                void setParent(int);
                void setChildren(int, int);             //leftchild, rightchild
                void setIndex(int);
@@ -34,6 +35,7 @@ class Node  {
                string getGroup();  
                float getBranchLength();
                float getLengthToLeaves();
+               float getLabel();
                int getParent();
                int getLChild();
                int getRChild();
@@ -53,7 +55,7 @@ class Node  {
        private:
                string                  name;
                string                  group;
-               float                   branchLength, length2leaf;
+               float                   branchLength, length2leaf, label;
                int                             parent;
                int                             lchild;
                int                             rchild;