From: Sarah Westcott <mothur.westcott@gmail.com>
Date: Wed, 3 Oct 2012 18:51:10 +0000 (-0400)
Subject: added zap method to classify.seqs and changed bayesian method name to wang.
X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=e8e13c129ba8184ec5932a090773f353f3ae3406

added zap method to classify.seqs and changed bayesian method name to wang.
---

diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj
index 9d6261b..b2895e8 100644
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -19,6 +19,11 @@
 		A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; };
 		A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; };
 		A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721765613BB9F7D0014DAAE /* referencedb.cpp */; };
+		A721AB6A161C570F009860A1 /* alignnode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB66161C570F009860A1 /* alignnode.cpp */; };
+		A721AB6B161C570F009860A1 /* aligntree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB68161C570F009860A1 /* aligntree.cpp */; };
+		A721AB71161C572A009860A1 /* kmernode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6D161C572A009860A1 /* kmernode.cpp */; };
+		A721AB72161C572A009860A1 /* kmertree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6F161C572A009860A1 /* kmertree.cpp */; };
+		A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB73161C573B009860A1 /* taxonomynode.cpp */; };
 		A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; };
 		A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
 		A7386C231619CCE600651424 /* classifysharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C211619CCE600651424 /* classifysharedcommand.cpp */; };
@@ -395,6 +400,16 @@
 		A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mergegroupscommand.cpp; sourceTree = "<group>"; };
 		A721765513BB9F7D0014DAAE /* referencedb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = referencedb.h; sourceTree = "<group>"; };
 		A721765613BB9F7D0014DAAE /* referencedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = referencedb.cpp; sourceTree = "<group>"; };
+		A721AB66161C570F009860A1 /* alignnode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignnode.cpp; sourceTree = "<group>"; };
+		A721AB67161C570F009860A1 /* alignnode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignnode.h; sourceTree = "<group>"; };
+		A721AB68161C570F009860A1 /* aligntree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligntree.cpp; sourceTree = "<group>"; };
+		A721AB69161C570F009860A1 /* aligntree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligntree.h; sourceTree = "<group>"; };
+		A721AB6D161C572A009860A1 /* kmernode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kmernode.cpp; sourceTree = "<group>"; };
+		A721AB6E161C572A009860A1 /* kmernode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kmernode.h; sourceTree = "<group>"; };
+		A721AB6F161C572A009860A1 /* kmertree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kmertree.cpp; sourceTree = "<group>"; };
+		A721AB70161C572A009860A1 /* kmertree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kmertree.h; sourceTree = "<group>"; };
+		A721AB73161C573B009860A1 /* taxonomynode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = taxonomynode.cpp; sourceTree = "<group>"; };
+		A721AB74161C573B009860A1 /* taxonomynode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = taxonomynode.h; sourceTree = "<group>"; };
 		A724D2B4153C8600000A826F /* makebiomcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makebiomcommand.h; sourceTree = "<group>"; };
 		A724D2B6153C8628000A826F /* makebiomcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makebiomcommand.cpp; sourceTree = "<group>"; };
 		A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
@@ -1775,10 +1790,18 @@
 		A7E9BA4B12D3966900DA6239 /* classifier */ = {
 			isa = PBXGroup;
 			children = (
-				A7E9B65A12D37EC300DA6239 /* bayesian.cpp */,
+				A721AB67161C570F009860A1 /* alignnode.h */,
+				A721AB66161C570F009860A1 /* alignnode.cpp */,
+				A721AB69161C570F009860A1 /* aligntree.h */,
+				A721AB68161C570F009860A1 /* aligntree.cpp */,
 				A7E9B65B12D37EC300DA6239 /* bayesian.h */,
+				A7E9B65A12D37EC300DA6239 /* bayesian.cpp */,
 				A7E9B68E12D37EC400DA6239 /* classify.cpp */,
 				A7E9B68F12D37EC400DA6239 /* classify.h */,
+				A721AB6E161C572A009860A1 /* kmernode.h */,
+				A721AB6D161C572A009860A1 /* kmernode.cpp */,
+				A721AB70161C572A009860A1 /* kmertree.h */,
+				A721AB6F161C572A009860A1 /* kmertree.cpp */,
 				A7E9B73812D37EC400DA6239 /* knn.h */,
 				A7E9B73712D37EC400DA6239 /* knn.cpp */,
 				A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */,
@@ -1787,6 +1810,8 @@
 				A7E9B79012D37EC400DA6239 /* phylotree.h */,
 				A7E9B85D12D37EC400DA6239 /* taxonomyequalizer.cpp */,
 				A7E9B85E12D37EC400DA6239 /* taxonomyequalizer.h */,
+				A721AB74161C573B009860A1 /* taxonomynode.h */,
+				A721AB73161C573B009860A1 /* taxonomynode.cpp */,
 			);
 			name = classifier;
 			sourceTree = "<group>";
@@ -2250,6 +2275,11 @@
 				A7386C29161A110800651424 /* decisiontree.cpp in Sources */,
 				A77E1938161B201E00DB1A2A /* randomforest.cpp in Sources */,
 				A77E193B161B289600DB1A2A /* rftreenode.cpp in Sources */,
+				A721AB6A161C570F009860A1 /* alignnode.cpp in Sources */,
+				A721AB6B161C570F009860A1 /* aligntree.cpp in Sources */,
+				A721AB71161C572A009860A1 /* kmernode.cpp in Sources */,
+				A721AB72161C572A009860A1 /* kmertree.cpp in Sources */,
+				A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/alignnode.cpp b/alignnode.cpp
new file mode 100755
index 0000000..ccd8fb0
--- /dev/null
+++ b/alignnode.cpp
@@ -0,0 +1,257 @@
+/*
+ *  alignNode.cpp
+ *  bayesian
+ *
+ *  Created by Pat Schloss on 10/11/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "alignNode.h"
+#include "taxonomynode.h"
+
+#include "bayesian.h"
+
+/**************************************************************************************************/
+
+AlignNode::AlignNode(string n, int l): TaxonomyNode(n, l){
+
+	alignLength = 0;
+}
+
+/**************************************************************************************************/
+
+void AlignNode::printTheta(){
+    try {
+        m->mothurOut("A:\t"); for(int i=0;i<alignLength;i++){	m->mothurOut(toString(theta[i].A)+ '\t');		}	m->mothurOutEndLine();
+        m->mothurOut("T:\t"); for(int i=0;i<alignLength;i++){	m->mothurOut(toString(theta[i].T)+ '\t');		}	m->mothurOutEndLine();
+        m->mothurOut("G:\t"); for(int i=0;i<alignLength;i++){	m->mothurOut(toString(theta[i].G)+ '\t');		}	m->mothurOutEndLine();
+        m->mothurOut("C:\t"); for(int i=0;i<alignLength;i++){	m->mothurOut(toString(theta[i].C)+ '\t');		}	m->mothurOutEndLine();
+        m->mothurOut("I:\t"); for(int i=0;i<alignLength;i++){	m->mothurOut(toString(theta[i].gap)+ '\t');		}	m->mothurOutEndLine();
+    }
+	catch(exception& e) {
+		m->errorOut(e, "AlignNode", "printTheta");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+int AlignNode::loadSequence(string& sequence){
+	try {
+        alignLength = (int)sequence.length();	//	this function runs through the alignment and increments the frequency
+        //	of each base for a particular taxon.  we are building the thetas
+        
+        if(theta.size() == 0){
+            theta.resize(alignLength);
+            columnCounts.resize(alignLength, 0);
+        }
+        
+        for(int i=0;i<alignLength;i++){
+            
+            if (m->control_pressed) { return 0; }
+            
+            char base = sequence[i];
+            
+            if(base == 'A')		{	theta[i].A++;	columnCounts[i]++;		}	//	our thetas will be alignLength x 5  
+            else if(base == 'T'){	theta[i].T++;	columnCounts[i]++;		}	//	and we ignore any position that has  
+            else if(base == 'G'){	theta[i].G++;	columnCounts[i]++;		}	//	an ambiguous base call
+            else if(base == 'C'){	theta[i].C++;	columnCounts[i]++;		}
+            else if(base == '-'){	theta[i].gap++; columnCounts[i]++;		}
+            else if(base == 'U'){	theta[i].T++;	columnCounts[i]++;		}
+        }
+        
+        numSeqs++;
+        
+        return 0;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "AlignNode", "loadSequence");
+		exit(1);
+	}
+}	
+
+/**************************************************************************************************/
+
+int AlignNode::checkTheta(){
+    try {
+        for(int i=0;i<alignLength;i++){
+            
+            if (m->control_pressed) { return 0; }
+            
+            if(theta[i].gap == columnCounts[i]){
+                columnCounts[i] = 0;
+            }
+            //        else{
+            //            int maxCount = theta[i].A;
+            //            
+            //            if(theta[i].T > maxCount)   {    maxCount = theta[i].T;  }
+            //            if(theta[i].G > maxCount)   {    maxCount = theta[i].T;  }
+            //            if(theta[i].C > maxCount)   {    maxCount = theta[i].T;  }
+            //            if(theta[i].gap > maxCount) {    maxCount = theta[i].T;  }
+            //        
+            //            if(maxCount < columnCounts[i] * 0.25){// || maxCount == columnCounts[i]){   //remove any column where the maximum frequency is <50%
+            //                columnCounts[i] = 0;
+            //            }
+            //        }
+            
+        }
+        
+        return 0;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "AlignNode", "checkTheta");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+int AlignNode::addThetas(vector<thetaAlign> newTheta, int newNumSeqs){
+	try {
+        if(alignLength == 0){
+            alignLength = (int)newTheta.size();
+            theta.resize(alignLength);
+            columnCounts.resize(alignLength);
+        }
+        
+        for(int i=0;i<alignLength;i++){	
+            
+            if (m->control_pressed) { return 0; }
+            
+            theta[i].A += newTheta[i].A;		columnCounts[i] += newTheta[i].A;
+            theta[i].T += newTheta[i].T;		columnCounts[i] += newTheta[i].T;
+            theta[i].G += newTheta[i].G;		columnCounts[i] += newTheta[i].G;
+            theta[i].C += newTheta[i].C;		columnCounts[i] += newTheta[i].C;
+            theta[i].gap += newTheta[i].gap;	columnCounts[i] += newTheta[i].gap;
+        }
+        
+        numSeqs += newNumSeqs;
+        
+        return 0;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "AlignNode", "addThetas");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+double AlignNode::getSimToConsensus(string& query){
+	try {
+        double similarity = 0;
+        
+        int length = 0;
+        
+        for(int i=0;i<alignLength;i++){
+            
+            if (m->control_pressed) { return similarity; }
+            
+            char base = query[i];
+            
+            if(base != '.' && base != 'N' && columnCounts[i] != 0){
+                
+                double fraction = 0;
+                
+                if(base == 'A'){
+                    fraction = (int) theta[i].A / (double) columnCounts[i];
+                    similarity += fraction;
+                    length++;
+                }
+                else if(base == 'T'){
+                    fraction = (int) theta[i].T / (double) columnCounts[i];
+                    similarity += fraction;
+                    length++;
+                }
+                else if(base == 'G'){
+                    fraction = (int) theta[i].G / (double) columnCounts[i];
+                    similarity += fraction;
+                    length++;
+                }
+                else if(base == 'C'){
+                    fraction = (int) theta[i].C / (double) columnCounts[i];
+                    similarity += fraction;
+                    length++;
+                }
+                else if(base == '-'){
+                    fraction = (int) theta[i].gap / (double) columnCounts[i];
+                    similarity += fraction;
+                    length++;
+                }
+            }
+        }
+        
+        if(length != 0){
+            similarity /= double(length);
+        }
+        else {
+            similarity = 0;
+        }
+        
+        return similarity;	
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "AlignNode", "getSimToConsensus");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+double AlignNode::getPxGivenkj_D_j(string& query){	//P(x | k_j, D, j)
+	try {
+        double PxGivenkj_D_j = 0;
+        
+        int count = 0;
+        double alpha = 1 / (double)totalSeqs;	//flat prior
+        
+        
+        for(int s=0;s<alignLength;s++){
+            
+            if (m->control_pressed) { return PxGivenkj_D_j; }
+            
+            char base = query[s];
+            thetaAlign thetaS = theta[s];
+            
+            if(base != '.' && base != 'N' && columnCounts[s] != 0){
+                double Nkj_s = (double)columnCounts[s];	
+                double nkj_si = 0;
+                
+                
+                if(base == 'A')		{	nkj_si = (double)thetaS.A;		}
+                else if(base == 'T'){	nkj_si = (double)thetaS.T;		}	
+                else if(base == 'G'){	nkj_si = (double)thetaS.G;		}	
+                else if(base == 'C'){	nkj_si = (double)thetaS.C;		}	
+                else if(base == '-'){	nkj_si = (double)thetaS.gap;	}
+                else if(base == 'U'){	nkj_si = (double)thetaS.T;		}	
+                
+                //			double alpha = pow(0.2, double(Nkj_s)) + 0.0001;	//need to make 1e-4 a variable in future; this is the non-flat prior
+                
+                //			if(columnCounts[s] != nkj_si){						//deal only with segregating sites...
+				double numerator = nkj_si + alpha;
+				double denomenator = Nkj_s + 5.0 * alpha;
+				
+				PxGivenkj_D_j += log(numerator) - log(denomenator);		
+				count++;
+                //			}
+            }
+            if(base != '.' && columnCounts[s] == 0 && thetaS.gap == 0){
+                count = 0;
+                break;
+            }
+            
+        }
+        
+        if(count == 0){	PxGivenkj_D_j = -1e10;	}	
+        
+        return PxGivenkj_D_j;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "AlignNode", "getPxGivenkj_D_j");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
diff --git a/alignnode.h b/alignnode.h
new file mode 100755
index 0000000..4aecca7
--- /dev/null
+++ b/alignnode.h
@@ -0,0 +1,49 @@
+#ifndef ALIGNNODE
+#define ALIGNNODE
+
+/*
+ *  alignNode.h
+ *  bayesian
+ *
+ *  Created by Pat Schloss on 10/11/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "taxonomynode.h"
+
+/**************************************************************************************************/
+
+struct thetaAlign {
+	thetaAlign() : A(0), T(0), G(0), C(0), gap(0){}
+	unsigned int A;
+	unsigned int T;
+	unsigned int G;
+	unsigned int C;
+	unsigned int gap;
+};
+
+/**************************************************************************************************/
+
+class AlignNode : public TaxonomyNode {
+	
+public:
+	AlignNode(string, int);
+	int loadSequence(string&);
+	int checkTheta();
+    void printTheta();
+	double getPxGivenkj_D_j(string& query);	//P(x | k_j, D, j)
+	double getSimToConsensus(string& query);
+	vector<thetaAlign> getTheta()	{	return theta;	}
+	int addThetas(vector<thetaAlign>, int);
+	
+private:
+	vector<thetaAlign> theta;
+	vector<unsigned int> columnCounts;
+	int alignLength;
+};
+
+/**************************************************************************************************/
+
+#endif
+
diff --git a/aligntree.cpp b/aligntree.cpp
new file mode 100755
index 0000000..41667ca
--- /dev/null
+++ b/aligntree.cpp
@@ -0,0 +1,371 @@
+//
+//  alignTree.cpp
+//  pdsBayesian
+//
+//  Created by Patrick Schloss on 4/3/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "alignnode.h"
+#include "aligntree.h"
+
+/**************************************************************************************************/
+
+AlignTree::AlignTree(string referenceFileName, string taxonomyFileName, int cutoff) : Classify(), confidenceThreshold(cutoff){
+	try {
+        AlignNode* newNode = new AlignNode("Root", 0);
+        tree.push_back(newNode);			//	the tree is stored as a vector of elements of type TaxonomyNode
+        
+        string refTaxonomy;
+        
+        readTaxonomy(taxonomyFileName);
+     
+        ifstream referenceFile;
+        m->openInputFile(referenceFileName, referenceFile);
+        bool error = false;
+        map<int, int> lengths;
+        while(!referenceFile.eof()){
+            
+            if (m->control_pressed) { break; }
+            
+            Sequence seq(referenceFile);  m->gobble(referenceFile);
+            
+            if (seq.getName() != "") {
+                map<string, string>::iterator it = taxonomy.find(seq.getName());
+                
+                if (it != taxonomy.end()) {
+                    refTaxonomy = it->second;		//	lookup the taxonomy string for the current reference sequence
+                    string aligned = seq.getAligned();
+                    lengths[aligned.length()] = 1;
+                    if (lengths.size() > 1) { error = true; m->mothurOut("[ERROR]: reference sequences must be aligned to use the align method, quitting.\n"); break; }
+                    addTaxonomyToTree(seq.getName(), refTaxonomy, aligned);
+                }else {
+                    m->mothurOut(seq.getName() + " is in your reference file, but not in your taxonomy file, please correct.\n"); error = true;
+                }
+            }
+        }
+        referenceFile.close();
+        
+        length = (lengths.begin())->first;  
+           
+        if (error) { m->control_pressed = true; }
+        
+        numTaxa = (int)tree.size();
+        
+        numLevels = 0;
+        for(int i=0;i<numTaxa;i++){
+            int level = tree[i]->getLevel();
+            if(level > numLevels){	numLevels = level;	}
+        }
+        numLevels++;
+        
+        aggregateThetas();
+        
+        int dbSize = tree[0]->getNumSeqs();
+        
+        for(int i=0;i<numTaxa;i++){
+            tree[i]->checkTheta();
+            tree[i]->setTotalSeqs(dbSize);
+        }
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "AlignTree");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+AlignTree::~AlignTree(){
+	try {
+        for(int i=0;i<tree.size();i++){
+            delete tree[i];
+        }
+	}
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "~AlignTree");
+        exit(1);
+    }
+}	
+
+/**************************************************************************************************/
+
+int AlignTree::addTaxonomyToTree(string seqName, string& taxonomy, string& sequence){
+	try {
+        AlignNode* newNode;
+        string taxonName = "";
+        int treePosition = 0;							//	the root is element 0
+        
+        int level = 1;
+        
+        for(int i=0;i<taxonomy.length();i++){			//	step through taxonomy string...
+            
+            if (m->control_pressed) { break; }
+            
+            if(taxonomy[i] == ';'){						//	looking for semicolons...
+                
+                if (taxonName == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); m->control_pressed = true; }
+                
+                int newIndex = tree[treePosition]->getChildIndex(taxonName);	//	look to see if your current node already
+                //	has a child with the new taxonName
+                if(newIndex != -1)	{	treePosition = newIndex;	}		//	if you've seen it before, jump to that
+                else {														//	 position in the tree
+                    int newChildIndex = (int)tree.size();						//	otherwise, we'll have to create one...
+                    tree[treePosition]->makeChild(taxonName, newChildIndex);
+                    
+                    newNode = new AlignNode(taxonName, level);
+                    
+                    newNode->setParent(treePosition);
+                    
+                    tree.push_back(newNode);
+                    treePosition = newChildIndex;
+                }
+                
+                //	sequence data to that node to update that node's theta - seems slow...				
+                taxonName = "";								//	clear out the taxon name that we will build as we look 
+                level++;
+            }												//	for a semicolon
+            else{
+                taxonName += taxonomy[i];					//	keep adding letters until we reach a semicolon
+            }
+        }
+        tree[treePosition]->loadSequence(sequence);	//	now that we've gotten to the correct node, add the
+        
+        return 0;
+	}
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "addTaxonomyToTree");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+int AlignTree::aggregateThetas(){
+	try {
+        vector<vector<int> > levelMatrix(numLevels+1);
+        
+        for(int i=0;i<tree.size();i++){
+            if (m->control_pressed) { return 0; }
+            levelMatrix[tree[i]->getLevel()].push_back(i);
+        }
+		
+        for(int i=numLevels-1;i>0;i--){
+            if (m->control_pressed) { return 0; }
+            for(int j=0;j<levelMatrix[i].size();j++){
+                
+                AlignNode* holder = tree[levelMatrix[i][j]];
+                
+                tree[holder->getParent()]->addThetas(holder->getTheta(), holder->getNumSeqs());				
+            }
+        }
+	    return 0;
+	}
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "aggregateThetas");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+double AlignTree::getOutlierLogProbability(string& sequence){
+	try {
+        double count = 0;
+        
+        for(int i=0;i<sequence.length();i++){
+            
+            if(sequence[i] != '.'){	count++;	}
+            
+        }
+        
+        return count * log(0.2);
+    }
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "getOutlierLogProbability");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+int AlignTree::getMinRiskIndexAlign(string& sequence, vector<int>& taxaIndices, vector<double>& probabilities){
+	try {
+        int numProbs = (int)probabilities.size();
+        
+        vector<double> G(numProbs, 0.2);	//a random sequence will, on average, be 20% similar to any other sequence
+        vector<double> risk(numProbs, 0);
+        
+        for(int i=1;i<numProbs;i++){ //use if you want the outlier group
+            if (m->control_pressed) { return 0; }
+            G[i] = tree[taxaIndices[i]]->getSimToConsensus(sequence);
+        }
+        
+        double minRisk = 1e6;
+        int minRiskIndex = 0;
+        
+        for(int i=0;i<numProbs;i++){
+            if (m->control_pressed) { return 0; }
+            for(int j=0;j<numProbs;j++){
+                if(i != j){
+                    risk[i] += probabilities[j] * G[j];
+                }			
+            }
+            
+            if(risk[i] < minRisk){
+                minRisk = risk[i];
+                minRiskIndex = i;
+            }
+        }
+        
+        return minRiskIndex;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "getMinRiskIndexAlign");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+int AlignTree::sanityCheck(vector<vector<int> >& indices, vector<int>& maxIndices){
+	try {
+        int finalLevel = (int)indices.size()-1;
+        
+        for(int position=1;position<indices.size();position++){
+            if (m->control_pressed) { return 0; }
+            int predictedParent = tree[indices[position][maxIndices[position]]]->getParent();
+            int actualParent = indices[position-1][maxIndices[position-1]];
+            
+            if(predictedParent != actualParent){
+                finalLevel = position - 1;
+                return finalLevel;
+            }
+        }
+        return finalLevel;
+	}
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "sanityCheck");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+string AlignTree::getTaxonomy(Sequence* seq){
+    try {
+        string seqName = seq->getName(); string querySequence = seq->getAligned(); string taxonProbabilityString = "";
+        if (querySequence.length() != length) {
+            m->mothurOut("[ERROR]: " + seq->getName() + " has length " + toString(querySequence.length()) + ", reference sequences length is " + toString(length) + ". Are your sequences aligned? Sequences must be aligned to use the align search method.\n"); m->control_pressed = true; return "";
+        }
+        double logPOutlier = getOutlierLogProbability(querySequence);
+        
+        vector<vector<double> > pXgivenKj_D_j(numLevels);
+        vector<vector<int> > indices(numLevels);
+        for(int i=0;i<numLevels;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            pXgivenKj_D_j[i].push_back(logPOutlier);
+            indices[i].push_back(-1);
+        }
+        
+        
+        for(int i=0;i<numTaxa;i++){
+            //		cout << i << '\t' << tree[i]->getName() << '\t' << tree[i]->getLevel() << '\t' << tree[i]->getPxGivenkj_D_j(querySequence) << endl;
+            if (m->control_pressed) { return taxonProbabilityString; }
+            pXgivenKj_D_j[tree[i]->getLevel()].push_back(tree[i]->getPxGivenkj_D_j(querySequence));
+            indices[tree[i]->getLevel()].push_back(i);
+        }
+        
+        vector<double> sumLikelihood(numLevels, 0);
+        vector<double> bestPosterior(numLevels, 0);
+        vector<int> maxIndex(numLevels, 0);
+        int maxPosteriorIndex;
+        
+        
+        	//cout << "before best level" << endl;
+        
+        //let's find the best level and taxa within that level
+        for(int i=0;i<numLevels;i++){ //go across all j's - from the root to genus
+            if (m->control_pressed) { return taxonProbabilityString; }
+            int numTaxaInLevel = (int)indices[i].size();
+            
+            		//cout << "numTaxaInLevel:\t" << numTaxaInLevel << endl;
+            
+            vector<double> posteriors(numTaxaInLevel, 0);		
+            sumLikelihood[i] = getLogExpSum(pXgivenKj_D_j[i], maxPosteriorIndex);
+            
+            maxPosteriorIndex = 0;
+            for(int j=0;j<numTaxaInLevel;j++){
+                posteriors[j] = exp(pXgivenKj_D_j[i][j] - sumLikelihood[i]);
+                
+                if(posteriors[j] > posteriors[maxPosteriorIndex]){	
+                    maxPosteriorIndex = j;
+                }
+                
+            }
+            
+            maxIndex[i] = getMinRiskIndexAlign(querySequence, indices[i], posteriors);
+            
+            maxIndex[i] = maxPosteriorIndex;
+            bestPosterior[i] = posteriors[maxIndex[i]];	
+        }
+        
+        //	vector<double> pX_level(numLevels, 0);
+        //	
+        //	for(int i=0;i<numLevels;i++){
+        //		pX_level[i] = pXgivenKj_D_j[i][maxIndex[i]] - tree[indices[i][maxIndex[i]]]->getNumSeqs();
+        //	}
+        //	
+        //	int max_pLevel_X_index = -1;
+        //	double pX_level_sum = getLogExpSum(pX_level, max_pLevel_X_index);
+        //	double max_pLevel_X = exp(pX_level[max_pLevel_X_index] - pX_level_sum);
+        //	
+        //	vector<double> pLevel_X(numLevels, 0);
+        //	for(int i=0;i<numLevels;i++){
+        //		pLevel_X[i] = exp(pX_level[i] - pX_level_sum);
+        //	}
+        
+        
+        
+        
+        int saneDepth = sanityCheck(indices, maxIndex);
+        
+        simpleTax = "";
+        int savedspot = 1;
+        taxonProbabilityString = "";
+        for(int i=1;i<=saneDepth;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            int confidenceScore = (int) (bestPosterior[i] * 100);
+            if (confidenceScore >= confidenceThreshold) {
+            if(indices[i][maxIndex[i]] != -1){
+                taxonProbabilityString += tree[indices[i][maxIndex[i]]]->getName() + '(' + toString(confidenceScore) + ");";
+                simpleTax += tree[indices[i][maxIndex[i]]]->getName() + ";";
+                //			levelProbabilityOutput << tree[indices[i][maxIndex[i]]]->getName() << '(' << setprecision(6) << pLevel_X[i] << ");";
+            }
+            else{
+                taxonProbabilityString + "unclassified" + '(' + toString(confidenceScore) + ");";
+                //			levelProbabilityOutput << "unclassified" << '(' << setprecision(6) << pLevel_X[i] << ");";
+                simpleTax += "unclassified;";
+            }
+            }else { break; }
+            savedspot = i;
+        }
+        
+        for(int i=savedspot+1;i<numLevels;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            taxonProbabilityString + "unclassified(0);";
+            simpleTax += "unclassified;";
+        }
+        
+        return taxonProbabilityString;
+	}
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "getTaxonomy");
+        exit(1);
+    }
+}
+
+
+/**************************************************************************************************/
diff --git a/aligntree.h b/aligntree.h
new file mode 100755
index 0000000..51008ff
--- /dev/null
+++ b/aligntree.h
@@ -0,0 +1,34 @@
+//
+//  alignTree.h
+//  pdsBayesian
+//
+//  Created by Patrick Schloss on 4/3/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#ifndef pdsBayesian_alignTree_h
+#define pdsBayesian_alignTree_h
+
+#include "classify.h"
+
+class AlignNode;
+
+class AlignTree : public Classify {
+
+public:
+	AlignTree(string, string, int);
+	~AlignTree();
+	string getTaxonomy(Sequence*);
+	
+private:
+    int addTaxonomyToTree(string, string&, string&);
+	double getOutlierLogProbability(string&);
+	int getMinRiskIndexAlign(string&, vector<int>&, vector<double>&);
+	int aggregateThetas();
+	int sanityCheck(vector<vector<int> >&, vector<int>&);
+
+	int numSeqs, confidenceThreshold, length;
+	vector<AlignNode*> tree;
+};
+
+#endif
diff --git a/classify.cpp b/classify.cpp
index 459c90f..8aa3cdb 100644
--- a/classify.cpp
+++ b/classify.cpp
@@ -354,3 +354,37 @@ vector<string> Classify::parseTax(string tax) {
 }
 /**************************************************************************************************/
 
+double Classify::getLogExpSum(vector<double> probabilities, int& maxIndex){
+	try {
+        //	http://jblevins.org/notes/log-sum-exp
+        
+        double maxProb = probabilities[0];
+        maxIndex = 0;
+        
+        int numProbs = (int)probabilities.size();
+        
+        for(int i=1;i<numProbs;i++){
+            if(probabilities[i] >= maxProb){
+                maxProb = probabilities[i];
+                maxIndex = i;
+            }
+        }
+        
+        double probSum = 0.0000;
+        
+        for(int i=0;i<numProbs;i++){
+            probSum += exp(probabilities[i] - maxProb);		
+        }
+        
+        probSum = log(probSum) + maxProb;
+        
+        return probSum;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "Classify", "getLogExpSum");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
diff --git a/classify.h b/classify.h
index 6582be4..7b4c102 100644
--- a/classify.h
+++ b/classify.h
@@ -17,10 +17,8 @@
 #include "database.hpp"
 #include "phylotree.h"
 
-
 class Sequence;
 
-
 /**************************************************************************************************/
 
 class Classify {
@@ -37,7 +35,6 @@ public:
 protected:
 
 	map<string, string> taxonomy;  //name maps to taxonomy
-	//map<string, int> genusCount;  //maps genus to count - in essence a list of how many seqs are in each taxonomy
 	map<string, int>::iterator itTax;
 	map<string, string>::iterator it;
 	Database* database;
@@ -45,11 +42,12 @@ protected:
 	
 	string taxFile, templateFile, simpleTax;
 	vector<string> names;
-	int threadID;
+	int threadID, numLevels, numTaxa;
 	bool flip, flipped, shortcuts;
 	
 	int readTaxonomy(string);
 	vector<string> parseTax(string);
+    double getLogExpSum(vector<double>, int&);
 	MothurOut* m;
 	
 };
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp
index 2e55673..36bccc2 100644
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -21,9 +21,9 @@ vector<string> ClassifySeqsCommand::setParameters(){
         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
 		CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
 
-		CommandParameter psearch("search", "Multiple", "kmer-blast-suffix-distance", "kmer", "", "", "",false,false); parameters.push_back(psearch);
+		CommandParameter psearch("search", "Multiple", "kmer-blast-suffix-distance-align", "kmer", "", "", "",false,false); parameters.push_back(psearch);
 		CommandParameter pksize("ksize", "Number", "", "8", "", "", "",false,false); parameters.push_back(pksize);
-		CommandParameter pmethod("method", "Multiple", "bayesian-knn", "bayesian", "", "", "",false,false); parameters.push_back(pmethod);
+		CommandParameter pmethod("method", "Multiple", "wang-knn-zap", "wang", "", "", "",false,false); parameters.push_back(pmethod);
 		CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
 		CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
 		CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
@@ -55,11 +55,11 @@ string ClassifySeqsCommand::getHelpString(){
 		helpString += "The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n";
 		helpString += "The classify.seqs command parameters are reference, fasta, name, group, count, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n";
 		helpString += "The reference, fasta and taxonomy parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
-		helpString += "The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer, blast and distance. The default is kmer.\n";
+		helpString += "The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer, blast, align and distance. The default is kmer.\n";
 		helpString += "The name parameter allows you add a names file with your fasta file, if you enter multiple fasta files, you must enter matching names files for them.\n";
 		helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
         helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
-		helpString += "The method parameter allows you to specify classification method to use.  Your options are: bayesian and knn. The default is bayesian.\n";
+		helpString += "The method parameter allows you to specify classification method to use.  Your options are: wang, knn and zap. The default is wang.\n";
 		helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n";
 		helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
 #ifdef USE_MPI
@@ -72,8 +72,8 @@ string ClassifySeqsCommand::getHelpString(){
 		helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
 		helpString += "The numwanted parameter allows you to specify the number of sequence matches you want with the knn method.  The default is 10.\n";
 		helpString += "The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy.  The default is 0.\n";
-		helpString += "The probs parameter shuts off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be shown.\n";
-		helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the bayesian method.  The default is 100.\n";
+		helpString += "The probs parameter shuts off the bootstrapping results for the wang and zap method. The default is true, meaning you want the bootstrapping to be shown.\n";
+		helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the wang method.  The default is 100.\n";
 		//helpString += "The flip parameter allows you shut off mothur's   The default is T.\n";
 		helpString += "The classify.seqs command should be in the following format: \n";
 		helpString += "classify.seqs(reference=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n";
@@ -492,9 +492,6 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
 			//check for optional parameter and set defaults
 			// ...at some point should added some additional type checking...
 			string temp;
-			temp = validParameter.validFile(parameters, "ksize", false);		if (temp == "not found"){	temp = "8";				}
-			m->mothurConvert(temp, kmerSize); 
-			
 			temp = validParameter.validFile(parameters, "processors", false);	if (temp == "not found"){	temp = m->getProcessors();	}
 			m->setProcessors(temp);
 			m->mothurConvert(temp, processors); 
@@ -536,7 +533,13 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
 			
 			search = validParameter.validFile(parameters, "search", false);		if (search == "not found"){	search = "kmer";		}
 			
-			method = validParameter.validFile(parameters, "method", false);		if (method == "not found"){	method = "bayesian";	}
+			method = validParameter.validFile(parameters, "method", false);		if (method == "not found"){	method = "wang";	}
+            
+            temp = validParameter.validFile(parameters, "ksize", false);		if (temp == "not found"){	
+                temp = "8";	
+                if (method == "zap") { temp = "7"; }
+            }
+			m->mothurConvert(temp, kmerSize); 
 			
 			temp = validParameter.validFile(parameters, "match", false);		if (temp == "not found"){	temp = "1.0";			}
 			m->mothurConvert(temp, match);  
@@ -570,8 +573,13 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
 			m->mothurConvert(temp, iters); 
 
 			
-			if ((method == "bayesian") && (search != "kmer"))  { 
-				m->mothurOut("The bayesian method requires the kmer search." + search + "will be disregarded." ); m->mothurOutEndLine();
+			if ((method == "wang") && (search != "kmer"))  { 
+				m->mothurOut("The wang method requires the kmer search. " + search + " will be disregarded, and kmer will be used." ); m->mothurOutEndLine();
+				search = "kmer";
+			}
+            
+            if ((method == "zap") && ((search != "kmer") && (search != "align")))  { 
+				m->mothurOut("The zap method requires the kmer or align search. " + search + " will be disregarded, and kmer will be used." ); m->mothurOutEndLine();
 				search = "kmer";
 			}
 			
@@ -605,10 +613,16 @@ int ClassifySeqsCommand::execute(){
 	try {
 		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
         
-		if(method == "bayesian"){	classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);		}
+        string outputMethodTag = method + ".";
+		if(method == "wang"){	classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);	}
 		else if(method == "knn"){	classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());				}
+        else if(method == "zap"){	
+            outputMethodTag = search + "_" + outputMethodTag;
+            if (search == "kmer") {   classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); }
+            else {  classify = new AlignTree(templateFileName, taxonomyFileName, cutoff);  }
+        }
 		else {
-			m->mothurOut(search + " is not a valid method option. I will run the command using bayesian.");
+			m->mothurOut(search + " is not a valid method option. I will run the command using wang.");
 			m->mothurOutEndLine();
 			classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);	
 		}
@@ -633,10 +647,10 @@ int ClassifySeqsCommand::execute(){
             if (RippedTaxName != "") { RippedTaxName +=  "."; }   
           
 			if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
-			string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("taxonomy");
-			string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("accnos");
+			string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + outputMethodTag + getOutputFileNameTag("taxonomy");
+			string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + outputMethodTag +getOutputFileNameTag("accnos");
 			string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
-			string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("taxsummary");
+			string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + outputMethodTag + getOutputFileNameTag("taxsummary");
 			
 			if ((method == "knn") && (search == "distance")) { 
 				string DistName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("matchdist");
@@ -798,12 +812,12 @@ int ClassifySeqsCommand::execute(){
                 if (hasCount) { 
                     ct = new CountTable();
                     ct->readTable(countfileNames[s]);
-                    taxaSum = new PhyloSummary(baseTName, ct);
+                    taxaSum = new PhyloSummary(taxonomyFileName, ct);
                     taxaSum->summarize(tempTaxonomyFile);
                 }else {
                     if (groupfile != "") {  group = groupfileNames[s]; groupMap = new GroupMap(group); groupMap->readMap(); }
                     
-                    taxaSum = new PhyloSummary(baseTName, groupMap);
+                    taxaSum = new PhyloSummary(taxonomyFileName, groupMap);
                     
                     if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);	} delete classify; return 0; }
                     
diff --git a/classifyseqscommand.h b/classifyseqscommand.h
index 6d11d92..0cffec6 100644
--- a/classifyseqscommand.h
+++ b/classifyseqscommand.h
@@ -19,9 +19,11 @@
 #include "phylotree.h"
 #include "phylosummary.h"
 #include "knn.h"
+#include "kmertree.h"
+#include "aligntree.h"
 
 
-//KNN and Bayesian methods modeled from algorithms in
+//KNN and Wang methods modeled from algorithms in
 //NaÄ±Â¨ve Bayesian Classiï¬er for Rapid Assignment of rRNA Sequences 
 //into the New Bacterial Taxonomyô°â  
 //Qiong Wang,1 George M. Garrity,1,2 James M. Tiedje,1,2 and James R. Cole1* 
@@ -166,6 +168,11 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
 		Classify* myclassify;
 		if(pDataArray->method == "bayesian"){	myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts);		}
 		else if(pDataArray->method == "knn"){	myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID);				}
+        else if(pDataArray->method == "zap"){	
+            outputMethodTag = search + "_" + outputMethodTag;
+            if (pDataArray->search == "kmer") {   myclassify = new KmerTree(pDataArray->templateFileName, pDataArray->taxonomyFileName, pDataArray->kmerSize, pDataArray->cutoff); }
+            else {  myclassify = new AlignTree(pDataArray->templateFileName, pDataArray->taxonomyFileName, pDataArray->cutoff);  }
+        }
 		else {
 			pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
 			pDataArray->m->mothurOutEndLine();
diff --git a/kmernode.cpp b/kmernode.cpp
new file mode 100755
index 0000000..c087cac
--- /dev/null
+++ b/kmernode.cpp
@@ -0,0 +1,209 @@
+/*
+ *  kmerNode.cpp
+ *  bayesian
+ *
+ *  Created by Pat Schloss on 10/11/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "kmernode.h"
+
+
+/**********************************************************************************************************************/
+
+KmerNode::KmerNode(string s, int l, int n) : TaxonomyNode(s, l), kmerSize(n) {
+	try {
+        int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+        
+        numPossibleKmers = power4s[kmerSize];
+        numUniqueKmers = 0;
+        
+        kmerVector.assign(numPossibleKmers, 0);
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "KmerNode");
+		exit(1);
+	}
+}
+
+/**********************************************************************************************************************/
+
+void KmerNode::loadSequence(vector<int>& kmerProfile){
+	try {
+        for(int i=0;i<numPossibleKmers;i++){
+            if (m->control_pressed) { break; }
+            if(kmerVector[i] == 0 && kmerProfile[i] != 0)	{	numUniqueKmers++;	}
+            
+            kmerVector[i] += kmerProfile[i];
+        }
+        
+        numSeqs++;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "loadSequence");
+		exit(1);
+	}
+}	
+
+/**********************************************************************************************************************/
+
+string KmerNode::getKmerBases(int kmerNumber){
+	try {
+        //	Here we convert the kmer number into the kmer in terms of bases.
+        //
+        //	Example:	Score = 915 (for a 6-mer)
+        //				Base6 = (915 / 4^0) % 4 = 915 % 4 = 3 => T	[T]
+        //				Base5 = (915 / 4^1) % 4 = 228 % 4 = 0 => A	[AT]
+        //				Base4 = (915 / 4^2) % 4 = 57 % 4 = 1 => C	[CAT]
+        //				Base3 = (915 / 4^3) % 4 = 14 % 4 = 2 => G	[GCAT]
+        //				Base2 = (915 / 4^4) % 4 = 3 % 4 = 3 => T	[TGCAT]
+        //				Base1 = (915 / 4^5) % 4 = 0 % 4 = 0 => A	[ATGCAT] -> this checks out with the previous method
+        
+        int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+        
+        string kmer = "";
+        
+        if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){	//	if the kmer number is the same as the maxKmer then it must
+            for(int i=0;i<kmerSize;i++){					//	have had an N in it and so we'll just call it N x kmerSize
+                kmer += 'N';
+            }
+        }
+        else{
+            for(int i=0;i<kmerSize;i++){
+                if (m->control_pressed) { return kmer; }
+                int nt = (int)(kmerNumber / (float)power4s[i]) % 4;		//	the '%' operator returns the remainder 
+                if(nt == 0)		{	kmer = 'A' + kmer;	}				//	from int-based division ]
+                else if(nt == 1){	kmer = 'C' + kmer;	}
+                else if(nt == 2){	kmer = 'G' + kmer;	}
+                else if(nt == 3){	kmer = 'T' + kmer;	}
+            }
+        }
+        return kmer;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "getKmerBases");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+void KmerNode::addThetas(vector<int> newTheta, int newNumSeqs){
+	try {
+        for(int i=0;i<numPossibleKmers;i++){
+            if (m->control_pressed) { break; }
+            kmerVector[i] += newTheta[i];		
+        }
+        
+        //	if(alignLength == 0){
+        //		alignLength = (int)newTheta.size();
+        //		theta.resize(alignLength);
+        //		columnCounts.resize(alignLength);
+        //	}
+        //	
+        //	for(int i=0;i<alignLength;i++){	
+        //		theta[i].A += newTheta[i].A;		columnCounts[i] += newTheta[i].A;
+        //		theta[i].T += newTheta[i].T;		columnCounts[i] += newTheta[i].T;
+        //		theta[i].G += newTheta[i].G;		columnCounts[i] += newTheta[i].G;
+        //		theta[i].C += newTheta[i].C;		columnCounts[i] += newTheta[i].C;
+        //		theta[i].gap += newTheta[i].gap;	columnCounts[i] += newTheta[i].gap;
+        //	}
+        
+        numSeqs += newNumSeqs;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "addThetas");
+		exit(1);
+	}
+}
+
+/**********************************************************************************************************************/
+
+int KmerNode::getNumUniqueKmers(){
+    try {
+        if(numUniqueKmers == 0){
+            
+            for(int i=0;i<numPossibleKmers;i++){
+                if (m->control_pressed) { return numUniqueKmers; }
+                if(kmerVector[i] != 0){
+                    numUniqueKmers++;
+                }
+                
+            }
+            
+        }
+        
+        return numUniqueKmers;	
+        
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "getNumUniqueKmers");
+		exit(1);
+	}
+}
+
+/**********************************************************************************************************************/
+
+void KmerNode::printTheta(){
+	try {
+        m->mothurOut(name + "\n");
+        for(int i=0;i<numPossibleKmers;i++){
+            if(kmerVector[i] != 0){
+                m->mothurOut(getKmerBases(i) + '\t' + toString(kmerVector[i]) + "\n");
+            }
+        }
+        m->mothurOutEndLine();	
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "printTheta");
+		exit(1);
+	}
+	
+}
+/**************************************************************************************************/
+
+double KmerNode::getSimToConsensus(vector<int>& queryKmerProfile){
+	try {
+        double present = 0;
+        
+        for(int i=0;i<numPossibleKmers;i++){
+            if (m->control_pressed) { return present; }
+            if(queryKmerProfile[i] != 0 && kmerVector[i] != 0){
+                present++;
+            }
+        }	
+        
+        return present / double(queryKmerProfile.size() - kmerSize + 1);
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "getSimToConsensus");
+		exit(1);
+	}
+}
+
+/**********************************************************************************************************************/
+
+double KmerNode::getPxGivenkj_D_j(vector<int>& queryKmerProfile)	{	
+	try {
+        double sumLogProb = 0.0000;
+        double alpha = 1.0 / (double)totalSeqs;	//flat prior
+        //	double alpha = pow((1.0 / (double)numUniqueKmers), numSeqs)+0.0001;	//non-flat prior
+        
+        for(int i=0;i<numPossibleKmers;i++){
+            if (m->control_pressed) { return sumLogProb; }
+            if(queryKmerProfile[i] != 0){		//numUniqueKmers needs to be the value from Root;
+                sumLogProb += log((kmerVector[i] + alpha) / (numSeqs + numUniqueKmers * alpha));
+            }
+            
+        }
+        return sumLogProb;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "getPxGivenkj_D_j");
+		exit(1);
+	}
+
+}
+
+/**********************************************************************************************************************/
diff --git a/kmernode.h b/kmernode.h
new file mode 100755
index 0000000..e15fb1d
--- /dev/null
+++ b/kmernode.h
@@ -0,0 +1,45 @@
+#ifndef KMERNODE
+#define KMERNODE
+
+/*
+ *  kmerNode.h
+ *  bayesian
+ *
+ *  Created by Pat Schloss on 10/11/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+
+#include "taxonomynode.h"
+
+/**********************************************************************************************************************/
+
+class KmerNode : public TaxonomyNode {
+	
+public:
+	KmerNode(string, int, int);
+	void loadSequence(vector<int>&);
+	void printTheta();
+	double getPxGivenkj_D_j(vector<int>&);
+	double getSimToConsensus(vector<int>&);
+	void checkTheta(){};
+	void setNumUniqueKmers(int num)	{	numUniqueKmers = num;	}
+	int getNumUniqueKmers();
+	void addThetas(vector<int>, int);
+	vector<int> getTheta()	{	return kmerVector;	}
+
+
+private:
+	string getKmerBases(int);
+	int kmerSize;								//	value of k
+	int numPossibleKmers;						//	4^kmerSize
+	int numUniqueKmers;							//	number of unique kmers seen in a group ~ O_kj
+	int numKmers;								//	number of kmers in a sequence
+	vector<int> kmerVector;						//	counts of kmers across all sequences in a node
+};
+
+/**********************************************************************************************************************/
+
+#endif
+
diff --git a/kmertree.cpp b/kmertree.cpp
new file mode 100755
index 0000000..fbf2bfb
--- /dev/null
+++ b/kmertree.cpp
@@ -0,0 +1,386 @@
+//
+//  kmerTree.cpp
+//  pdsBayesian
+//
+//  Created by Patrick Schloss on 4/3/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "kmernode.h"
+#include "kmertree.h"
+
+/**************************************************************************************************/
+
+KmerTree::KmerTree(string referenceFileName, string taxonomyFileName, int k, int cutoff) : Classify(), confidenceThreshold(cutoff), kmerSize(k){
+	try {
+        KmerNode* newNode = new KmerNode("Root", 0, kmerSize);
+        tree.push_back(newNode);			//	the tree is stored as a vector of elements of type TaxonomyNode
+        
+        int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+        numPossibleKmers = power4s[kmerSize];
+        
+        string refTaxonomy;
+        
+        readTaxonomy(taxonomyFileName);
+        
+        ifstream referenceFile;
+        m->openInputFile(referenceFileName, referenceFile);
+        bool error = false;
+        while(!referenceFile.eof()){
+            
+            if (m->control_pressed) { break; }
+            
+            Sequence seq(referenceFile);  m->gobble(referenceFile);
+            
+            if (seq.getName() != "") {
+                map<string, string>::iterator it = taxonomy.find(seq.getName());
+                
+                if (it != taxonomy.end()) {
+                    refTaxonomy = it->second;		//	lookup the taxonomy string for the current reference sequence
+                    vector<int> kmerProfile = ripKmerProfile(seq.getUnaligned());	//convert to kmer vector
+                    addTaxonomyToTree(seq.getName(), refTaxonomy, kmerProfile);
+                }else {
+                    m->mothurOut(seq.getName() + " is in your reference file, but not in your taxonomy file, please correct.\n"); error = true;
+                }
+            }
+        }
+        referenceFile.close();
+        
+        if (error) { m->control_pressed = true; }
+        
+        numTaxa = (int)tree.size();
+        numLevels = 0;
+        for(int i=0;i<numTaxa;i++){
+            int level = tree[i]->getLevel();
+            if(level > numLevels){	numLevels = level;	}
+        }
+        numLevels++;
+        
+        aggregateThetas();
+        
+        int dbSize = tree[0]->getNumSeqs();
+        
+        for(int i=0;i<numTaxa;i++){
+            tree[i]->checkTheta();
+            tree[i]->setNumUniqueKmers(tree[0]->getNumUniqueKmers());
+            tree[i]->setTotalSeqs(dbSize);
+        }
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "KmerTree");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+KmerTree::~KmerTree(){
+	
+	for(int i=0;i<tree.size();i++){
+		delete tree[i];
+	}
+	
+}	
+/**********************************************************************************************************************/
+
+vector<int> KmerTree::ripKmerProfile(string sequence){
+    try {
+        //	assume all input sequences are unaligned
+        
+        int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+        
+        int nKmers = (int)sequence.length() - kmerSize + 1;
+        
+        vector<int> kmerProfile(numPossibleKmers + 1, 0);
+        
+        for(int i=0;i<nKmers;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int kmer = 0;
+            for(int j=0;j<kmerSize;j++){
+                if(toupper(sequence[j+i]) == 'A')		{	kmer += (0 * power4s[kmerSize-j-1]);	}
+                else if(toupper(sequence[j+i]) == 'C')	{	kmer += (1 * power4s[kmerSize-j-1]);	}
+                else if(toupper(sequence[j+i]) == 'G')	{	kmer += (2 * power4s[kmerSize-j-1]);	}
+                else if(toupper(sequence[j+i]) == 'U')	{	kmer += (3 * power4s[kmerSize-j-1]);	}
+                else if(toupper(sequence[j+i]) == 'T')	{	kmer += (3 * power4s[kmerSize-j-1]);	}
+                else									{	kmer = power4s[kmerSize]; j = kmerSize;	}
+            }
+            kmerProfile[kmer] = 1;
+        }
+        
+        return kmerProfile;	
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "ripKmerProfile");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+int KmerTree::addTaxonomyToTree(string seqName, string taxonomy, vector<int>& sequence){
+	try {
+        KmerNode* newNode;
+        string taxonName = "";
+        int treePosition = 0;							//	the root is element 0
+        
+        
+        int level = 1;
+        
+        for(int i=0;i<taxonomy.length();i++){			//	step through taxonomy string...
+            
+            if (m->control_pressed) { break; }
+            if(taxonomy[i] == ';'){						//	looking for semicolons...
+                
+                if (taxonName == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); m->control_pressed = true; }
+                
+                int newIndex = tree[treePosition]->getChildIndex(taxonName);//	look to see if your current node already
+                //	   has a child with the new taxonName
+                if(newIndex != -1)	{	treePosition = newIndex;	}		//	if you've seen it before, jump to that
+                else {														//	   position in the tree
+                    int newChildIndex = (int)tree.size();					//	otherwise, we'll have to create one...
+                    tree[treePosition]->makeChild(taxonName, newChildIndex);
+                    
+                    newNode = new KmerNode(taxonName, level, kmerSize);
+                    
+                    newNode->setParent(treePosition);
+                    
+                    tree.push_back(newNode);
+                    treePosition = newChildIndex;
+                }
+                
+                //	sequence data to that node to update that node's theta - seems slow...				
+                taxonName = "";								//	clear out the taxon name that we will build as we look 
+                level++;
+                
+            }												//	for a semicolon
+            else{
+                taxonName += taxonomy[i];					//	keep adding letters until we reach a semicolon
+            }
+        }
+        
+        tree[treePosition]->loadSequence(sequence);	//	now that we've gotten to the correct node, add the
+        
+        return 0;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "addTaxonomyToTree");
+		exit(1);
+	}
+	
+}
+
+/**************************************************************************************************/
+
+int KmerTree::aggregateThetas(){
+	try {
+        vector<vector<int> > levelMatrix(numLevels+1);
+        
+        for(int i=0;i<tree.size();i++){
+            if (m->control_pressed) { return 0; }
+            levelMatrix[tree[i]->getLevel()].push_back(i);
+        }
+        
+        for(int i=numLevels-1;i>0;i--) {
+            if (m->control_pressed) { return 0; }
+            
+            for(int j=0;j<levelMatrix[i].size();j++){
+                
+                KmerNode* holder = tree[levelMatrix[i][j]];
+                
+                tree[holder->getParent()]->addThetas(holder->getTheta(), holder->getNumSeqs());				
+            }
+        }
+        
+        return 0;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "aggregateThetas");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+int KmerTree::getMinRiskIndexKmer(vector<int>& sequence, vector<int>& taxaIndices, vector<double>& probabilities){
+	try {
+        int numProbs = (int)probabilities.size();
+        
+        vector<double> G(numProbs, 0.2);	//a random sequence will, on average, be 20% similar to any other sequence; not sure that this holds up for kmers; whatever.
+        vector<double> risk(numProbs, 0);
+        
+        for(int i=1;i<numProbs;i++){ //use if you want the outlier group
+            if (m->control_pressed) { return 0; }
+            G[i] = tree[taxaIndices[i]]->getSimToConsensus(sequence);
+        }
+        
+        double minRisk = 1e6;
+        int minRiskIndex = 0;
+        
+        for(int i=0;i<numProbs;i++){
+            if (m->control_pressed) { return 0; }
+            for(int j=0;j<numProbs;j++){
+                if(i != j){
+                    risk[i] += probabilities[j] * G[j];
+                }			
+            }
+            
+            if(risk[i] < minRisk){
+                minRisk = risk[i];
+                minRiskIndex = i;
+            }
+        }
+        
+        return minRiskIndex;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "getMinRiskIndexKmer");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+int KmerTree::sanityCheck(vector<vector<int> >& indices, vector<int>& maxIndices){
+	try {
+        int finalLevel = (int)indices.size()-1;
+        
+        for(int position=1;position<indices.size();position++){
+            if (m->control_pressed) { return 0; }
+            int predictedParent = tree[indices[position][maxIndices[position]]]->getParent();
+            int actualParent = indices[position-1][maxIndices[position-1]];
+            
+            if(predictedParent != actualParent){
+                finalLevel = position - 1;
+                return finalLevel;
+            }
+        }
+        return finalLevel;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "sanityCheck");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+string KmerTree::getTaxonomy(Sequence* thisSeq){
+	try {
+        string seqName = thisSeq->getName(); string querySequence = thisSeq->getAligned(); string taxonProbabilityString = "";
+        string unalignedSeq = thisSeq->getUnaligned();
+        
+        double logPOutlier = (querySequence.length() - kmerSize + 1) * log(1.0/(double)tree[0]->getNumUniqueKmers());
+        
+        vector<int> queryProfile = ripKmerProfile(unalignedSeq);	//convert to kmer vector
+        
+        vector<vector<double> > pXgivenKj_D_j(numLevels);
+        vector<vector<int> > indices(numLevels);
+        for(int i=0;i<numLevels;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            pXgivenKj_D_j[i].push_back(logPOutlier);
+            indices[i].push_back(-1);
+        }
+        
+        for(int i=0;i<numTaxa;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            pXgivenKj_D_j[tree[i]->getLevel()].push_back(tree[i]->getPxGivenkj_D_j(queryProfile));
+            indices[tree[i]->getLevel()].push_back(i);
+        }
+        
+        vector<double> sumLikelihood(numLevels, 0);
+        vector<double> bestPosterior(numLevels, 0);
+        vector<int> maxIndex(numLevels, 0);
+        int maxPosteriorIndex;
+        
+        //let's find the best level and taxa within that level
+        for(int i=0;i<numLevels;i++){ //go across all j's - from the root to genus
+            if (m->control_pressed) { return taxonProbabilityString; }
+            
+            int numTaxaInLevel = (int)indices[i].size();
+            
+            vector<double> posteriors(numTaxaInLevel, 0);		
+            sumLikelihood[i] = getLogExpSum(pXgivenKj_D_j[i], maxPosteriorIndex);
+            
+            maxPosteriorIndex = 0;
+            for(int j=0;j<numTaxaInLevel;j++){
+                posteriors[j] = exp(pXgivenKj_D_j[i][j] - sumLikelihood[i]);
+                if(posteriors[j] > posteriors[maxPosteriorIndex]){	
+                    maxPosteriorIndex = j;
+                }
+                
+            }
+            
+            maxIndex[i] = getMinRiskIndexKmer(queryProfile, indices[i], posteriors);
+            
+            maxIndex[i] = maxPosteriorIndex;
+            bestPosterior[i] = posteriors[maxIndex[i]];	
+        }
+        
+        //	vector<double> pX_level(numLevels, 0);
+        //	
+        //	for(int i=0;i<numLevels;i++){
+        //		pX_level[i] = pXgivenKj_D_j[i][maxIndex[i]] - tree[indices[i][maxIndex[i]]]->getNumSeqs();
+        //	}
+        //	
+        //	int max_pLevel_X_index = -1;
+        //	double pX_level_sum = getLogExpSum(pX_level, max_pLevel_X_index);
+        //	double max_pLevel_X = exp(pX_level[max_pLevel_X_index] - pX_level_sum);
+        //	
+        //	vector<double> pLevel_X(numLevels, 0);
+        //	for(int i=0;i<numLevels;i++){
+        //		pLevel_X[i] = exp(pX_level[i] - pX_level_sum);
+        //	}
+        
+        int saneDepth = sanityCheck(indices, maxIndex);
+        
+        
+        //	stringstream levelProbabilityOutput;
+        //	levelProbabilityOutput.setf(ios::fixed, ios::floatfield);
+        //	levelProbabilityOutput.setf(ios::showpoint);
+        
+        
+        //taxonProbabilityOutput << seqName << '\t';
+        //	taxonProbabilityOutput << seqName << '(' << max_pLevel_X_index << ';' << max_pLevel_X << ')' << '\t';
+        //	levelProbabilityOutput << seqName << '(' << max_pLevel_X_index << ';' << max_pLevel_X << ')' << '\t';
+        simpleTax = "";
+        int savedspot = 1;
+        taxonProbabilityString = "";
+        for(int i=1;i<=saneDepth;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            int confidenceScore = (int) (bestPosterior[i] * 100);
+            if (confidenceScore >= confidenceThreshold) {
+                if(indices[i][maxIndex[i]] != -1){
+                    taxonProbabilityString += tree[indices[i][maxIndex[i]]]->getName() + "(" + toString(confidenceScore) + ");";
+                    simpleTax += tree[indices[i][maxIndex[i]]]->getName() + ";";
+                    
+                    //			levelProbabilityOutput << tree[indices[i][maxIndex[i]]]->getName() << '(' << setprecision(6) << pLevel_X[i] << ");";
+                }
+                else{
+                    taxonProbabilityString += "unclassified" + '(' + toString(confidenceScore) + ");";
+                    //			levelProbabilityOutput << "unclassified" << '(' << setprecision(6) << pLevel_X[i] << ");";
+                    simpleTax += "unclassified;";
+                }
+            }else { break; }
+            savedspot = i;
+        }
+        
+        
+        
+        for(int i=savedspot+1;i<numLevels;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            taxonProbabilityString += "unclassified(0);";
+            simpleTax += "unclassified;";
+        }
+        
+        return taxonProbabilityString;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "getTaxonomy");
+		exit(1);
+	}
+}
+
+
+/**************************************************************************************************/
+
diff --git a/kmertree.h b/kmertree.h
new file mode 100755
index 0000000..f7c10ef
--- /dev/null
+++ b/kmertree.h
@@ -0,0 +1,37 @@
+//
+//  kmerTree.h
+//  pdsBayesian
+//
+//  Created by Patrick Schloss on 4/3/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#ifndef pdsBayesian_kmerTree_h
+#define pdsBayesian_kmerTree_h
+
+#include "classify.h"
+
+class KmerNode;
+
+class KmerTree : public Classify {
+	
+public:
+	KmerTree(string, string, int, int);
+	~KmerTree();
+	
+    string getTaxonomy(Sequence*);
+
+private:
+    int addTaxonomyToTree(string, string, vector<int>&);
+	vector<int> ripKmerProfile(string);
+	int getMinRiskIndexKmer(vector<int>&, vector<int>&, vector<double>&);
+	int aggregateThetas();
+	int sanityCheck(vector<vector<int> >&, vector<int>&);
+
+	int kmerSize;
+	int numPossibleKmers, confidenceThreshold;
+	vector<KmerNode*> tree;
+
+};
+
+#endif
diff --git a/taxonomynode.cpp b/taxonomynode.cpp
new file mode 100755
index 0000000..b90bda1
--- /dev/null
+++ b/taxonomynode.cpp
@@ -0,0 +1,72 @@
+/*
+ *  taxonomynode.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 7/8/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+/**************************************************************************************************/
+
+#include "taxonomynode.h"
+
+/**************************************************************************************************/
+
+TaxonomyNode::TaxonomyNode(string n, int l): name(n), level(l){
+    m = MothurOut::getInstance();
+	parent = -1;
+	numChildren = 0;
+	numSeqs = 0;
+}
+
+/**************************************************************************************************/
+
+void TaxonomyNode::setName(string n)			{	name = n;					}
+
+/**************************************************************************************************/
+
+string TaxonomyNode::getName()					{	return name;				}
+
+/**************************************************************************************************/
+
+void TaxonomyNode::setParent(int p)				{	parent = p;					}
+
+/**************************************************************************************************/
+
+int TaxonomyNode::getParent()					{	return parent;				}
+
+/**************************************************************************************************/
+
+void TaxonomyNode::makeChild(string c, int i)	{	children[c] = i;			}
+
+
+/**************************************************************************************************/
+
+map<string, int> TaxonomyNode::getChildren()	{	return children;			}
+
+/**************************************************************************************************/
+
+int TaxonomyNode::getChildIndex(string c){
+	map<string, int>::iterator it = children.find(c);
+	if(it != children.end())	{	return it->second;			}
+	else						{	return -1;					}	
+}
+
+/**************************************************************************************************/
+
+int	TaxonomyNode::getNumKids()					{	return (int)children.size();		}
+
+/**************************************************************************************************/
+
+int	TaxonomyNode::getNumSeqs()					{	return numSeqs;				}
+
+/**************************************************************************************************/
+
+void TaxonomyNode::setTotalSeqs(int n)			{	totalSeqs = n;				}
+
+/**************************************************************************************************/
+
+int TaxonomyNode::getLevel()					{	return level;				}
+
+/**************************************************************************************************/
diff --git a/taxonomynode.h b/taxonomynode.h
new file mode 100755
index 0000000..08bad3e
--- /dev/null
+++ b/taxonomynode.h
@@ -0,0 +1,53 @@
+#ifndef TAXONOMYNODE
+#define TAXONOMYNODE
+
+/*
+ *  taxonomynode.h
+ *  
+ *
+ *  Created by Pat Schloss on 7/8/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+/**************************************************************************************************/
+
+#include "mothurout.h"
+/**************************************************************************************************/
+
+class TaxonomyNode {
+	
+public:
+	TaxonomyNode();
+	TaxonomyNode(string, int);
+	void setName(string);
+	string getName();
+
+
+	void setParent(int);
+	int getParent();
+	
+	void makeChild(string, int);
+	map<string, int> getChildren();
+	int getChildIndex(string);
+	int	getNumKids();
+	int getNumSeqs();
+	void setTotalSeqs(int);
+	int getLevel();
+	
+private:
+	int parent;
+	map<string, int> children;
+	int numChildren;
+	int level;
+	
+protected:
+    MothurOut* m;
+	int numSeqs;
+	int totalSeqs;
+	string name;
+};
+
+/**************************************************************************************************/
+
+#endif