From e8e13c129ba8184ec5932a090773f353f3ae3406 Mon Sep 17 00:00:00 2001
From: Sarah Westcott <mothur.westcott@gmail.com>
Date: Wed, 3 Oct 2012 14:51:10 -0400
Subject: [PATCH] added zap method to classify.seqs and changed bayesian method
 name to wang.

---
 Mothur.xcodeproj/project.pbxproj |  32 ++-
 alignnode.cpp                    | 257 ++++++++++++++++++++
 alignnode.h                      |  49 ++++
 aligntree.cpp                    | 371 +++++++++++++++++++++++++++++
 aligntree.h                      |  34 +++
 classify.cpp                     |  34 +++
 classify.h                       |   6 +-
 classifyseqscommand.cpp          |  52 +++--
 classifyseqscommand.h            |   9 +-
 kmernode.cpp                     | 209 +++++++++++++++++
 kmernode.h                       |  45 ++++
 kmertree.cpp                     | 386 +++++++++++++++++++++++++++++++
 kmertree.h                       |  37 +++
 taxonomynode.cpp                 |  72 ++++++
 taxonomynode.h                   |  53 +++++
 15 files changed, 1621 insertions(+), 25 deletions(-)
 create mode 100755 alignnode.cpp
 create mode 100755 alignnode.h
 create mode 100755 aligntree.cpp
 create mode 100755 aligntree.h
 create mode 100755 kmernode.cpp
 create mode 100755 kmernode.h
 create mode 100755 kmertree.cpp
 create mode 100755 kmertree.h
 create mode 100755 taxonomynode.cpp
 create mode 100755 taxonomynode.h
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj
index 9d6261b..b2895e8 100644
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -19,6 +19,11 @@
 		A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; };
 		A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; };
 		A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721765613BB9F7D0014DAAE /* referencedb.cpp */; };
+		A721AB6A161C570F009860A1 /* alignnode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB66161C570F009860A1 /* alignnode.cpp */; };
+		A721AB6B161C570F009860A1 /* aligntree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB68161C570F009860A1 /* aligntree.cpp */; };
+		A721AB71161C572A009860A1 /* kmernode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6D161C572A009860A1 /* kmernode.cpp */; };
+		A721AB72161C572A009860A1 /* kmertree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6F161C572A009860A1 /* kmertree.cpp */; };
+		A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB73161C573B009860A1 /* taxonomynode.cpp */; };
 		A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; };
 		A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
 		A7386C231619CCE600651424 /* classifysharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C211619CCE600651424 /* classifysharedcommand.cpp */; };
@@ -395,6 +400,16 @@
 		A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mergegroupscommand.cpp; sourceTree = "<group>"; };
 		A721765513BB9F7D0014DAAE /* referencedb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = referencedb.h; sourceTree = "<group>"; };
 		A721765613BB9F7D0014DAAE /* referencedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = referencedb.cpp; sourceTree = "<group>"; };
+		A721AB66161C570F009860A1 /* alignnode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignnode.cpp; sourceTree = "<group>"; };
+		A721AB67161C570F009860A1 /* alignnode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignnode.h; sourceTree = "<group>"; };
+		A721AB68161C570F009860A1 /* aligntree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligntree.cpp; sourceTree = "<group>"; };
+		A721AB69161C570F009860A1 /* aligntree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligntree.h; sourceTree = "<group>"; };
+		A721AB6D161C572A009860A1 /* kmernode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kmernode.cpp; sourceTree = "<group>"; };
+		A721AB6E161C572A009860A1 /* kmernode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kmernode.h; sourceTree = "<group>"; };
+		A721AB6F161C572A009860A1 /* kmertree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kmertree.cpp; sourceTree = "<group>"; };
+		A721AB70161C572A009860A1 /* kmertree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kmertree.h; sourceTree = "<group>"; };
+		A721AB73161C573B009860A1 /* taxonomynode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = taxonomynode.cpp; sourceTree = "<group>"; };
+		A721AB74161C573B009860A1 /* taxonomynode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = taxonomynode.h; sourceTree = "<group>"; };
 		A724D2B4153C8600000A826F /* makebiomcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makebiomcommand.h; sourceTree = "<group>"; };
 		A724D2B6153C8628000A826F /* makebiomcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makebiomcommand.cpp; sourceTree = "<group>"; };
 		A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
@@ -1775,10 +1790,18 @@
 		A7E9BA4B12D3966900DA6239 /* classifier */ = {
 			isa = PBXGroup;
 			children = (
-				A7E9B65A12D37EC300DA6239 /* bayesian.cpp */,
+				A721AB67161C570F009860A1 /* alignnode.h */,
+				A721AB66161C570F009860A1 /* alignnode.cpp */,
+				A721AB69161C570F009860A1 /* aligntree.h */,
+				A721AB68161C570F009860A1 /* aligntree.cpp */,
 				A7E9B65B12D37EC300DA6239 /* bayesian.h */,
+				A7E9B65A12D37EC300DA6239 /* bayesian.cpp */,
 				A7E9B68E12D37EC400DA6239 /* classify.cpp */,
 				A7E9B68F12D37EC400DA6239 /* classify.h */,
+				A721AB6E161C572A009860A1 /* kmernode.h */,
+				A721AB6D161C572A009860A1 /* kmernode.cpp */,
+				A721AB70161C572A009860A1 /* kmertree.h */,
+				A721AB6F161C572A009860A1 /* kmertree.cpp */,
 				A7E9B73812D37EC400DA6239 /* knn.h */,
 				A7E9B73712D37EC400DA6239 /* knn.cpp */,
 				A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */,
@@ -1787,6 +1810,8 @@
 				A7E9B79012D37EC400DA6239 /* phylotree.h */,
 				A7E9B85D12D37EC400DA6239 /* taxonomyequalizer.cpp */,
 				A7E9B85E12D37EC400DA6239 /* taxonomyequalizer.h */,
+				A721AB74161C573B009860A1 /* taxonomynode.h */,
+				A721AB73161C573B009860A1 /* taxonomynode.cpp */,
 			);
 			name = classifier;
 			sourceTree = "<group>";
@@ -2250,6 +2275,11 @@
 				A7386C29161A110800651424 /* decisiontree.cpp in Sources */,
 				A77E1938161B201E00DB1A2A /* randomforest.cpp in Sources */,
 				A77E193B161B289600DB1A2A /* rftreenode.cpp in Sources */,
+				A721AB6A161C570F009860A1 /* alignnode.cpp in Sources */,
+				A721AB6B161C570F009860A1 /* aligntree.cpp in Sources */,
+				A721AB71161C572A009860A1 /* kmernode.cpp in Sources */,
+				A721AB72161C572A009860A1 /* kmertree.cpp in Sources */,
+				A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/alignnode.cpp b/alignnode.cpp
new file mode 100755
index 0000000..ccd8fb0
--- /dev/null
+++ b/alignnode.cpp
@@ -0,0 +1,257 @@
+/*
+ *  alignNode.cpp
+ *  bayesian
+ *
+ *  Created by Pat Schloss on 10/11/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "alignNode.h"
+#include "taxonomynode.h"
+
+#include "bayesian.h"
+
+/**************************************************************************************************/
+
+AlignNode::AlignNode(string n, int l): TaxonomyNode(n, l){
+
+	alignLength = 0;
+}
+
+/**************************************************************************************************/
+
+void AlignNode::printTheta(){
+    try {
+        m->mothurOut("A:\t"); for(int i=0;i<alignLength;i++){	m->mothurOut(toString(theta[i].A)+ '\t');		}	m->mothurOutEndLine();
+        m->mothurOut("T:\t"); for(int i=0;i<alignLength;i++){	m->mothurOut(toString(theta[i].T)+ '\t');		}	m->mothurOutEndLine();
+        m->mothurOut("G:\t"); for(int i=0;i<alignLength;i++){	m->mothurOut(toString(theta[i].G)+ '\t');		}	m->mothurOutEndLine();
+        m->mothurOut("C:\t"); for(int i=0;i<alignLength;i++){	m->mothurOut(toString(theta[i].C)+ '\t');		}	m->mothurOutEndLine();
+        m->mothurOut("I:\t"); for(int i=0;i<alignLength;i++){	m->mothurOut(toString(theta[i].gap)+ '\t');		}	m->mothurOutEndLine();
+    }
+	catch(exception& e) {
+		m->errorOut(e, "AlignNode", "printTheta");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+int AlignNode::loadSequence(string& sequence){
+	try {
+        alignLength = (int)sequence.length();	//	this function runs through the alignment and increments the frequency
+        //	of each base for a particular taxon.  we are building the thetas
+        
+        if(theta.size() == 0){
+            theta.resize(alignLength);
+            columnCounts.resize(alignLength, 0);
+        }
+        
+        for(int i=0;i<alignLength;i++){
+            
+            if (m->control_pressed) { return 0; }
+            
+            char base = sequence[i];
+            
+            if(base == 'A')		{	theta[i].A++;	columnCounts[i]++;		}	//	our thetas will be alignLength x 5  
+            else if(base == 'T'){	theta[i].T++;	columnCounts[i]++;		}	//	and we ignore any position that has  
+            else if(base == 'G'){	theta[i].G++;	columnCounts[i]++;		}	//	an ambiguous base call
+            else if(base == 'C'){	theta[i].C++;	columnCounts[i]++;		}
+            else if(base == '-'){	theta[i].gap++; columnCounts[i]++;		}
+            else if(base == 'U'){	theta[i].T++;	columnCounts[i]++;		}
+        }
+        
+        numSeqs++;
+        
+        return 0;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "AlignNode", "loadSequence");
+		exit(1);
+	}
+}	
+
+/**************************************************************************************************/
+
+int AlignNode::checkTheta(){
+    try {
+        for(int i=0;i<alignLength;i++){
+            
+            if (m->control_pressed) { return 0; }
+            
+            if(theta[i].gap == columnCounts[i]){
+                columnCounts[i] = 0;
+            }
+            //        else{
+            //            int maxCount = theta[i].A;
+            //            
+            //            if(theta[i].T > maxCount)   {    maxCount = theta[i].T;  }
+            //            if(theta[i].G > maxCount)   {    maxCount = theta[i].T;  }
+            //            if(theta[i].C > maxCount)   {    maxCount = theta[i].T;  }
+            //            if(theta[i].gap > maxCount) {    maxCount = theta[i].T;  }
+            //        
+            //            if(maxCount < columnCounts[i] * 0.25){// || maxCount == columnCounts[i]){   //remove any column where the maximum frequency is <50%
+            //                columnCounts[i] = 0;
+            //            }
+            //        }
+            
+        }
+        
+        return 0;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "AlignNode", "checkTheta");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+int AlignNode::addThetas(vector<thetaAlign> newTheta, int newNumSeqs){
+	try {
+        if(alignLength == 0){
+            alignLength = (int)newTheta.size();
+            theta.resize(alignLength);
+            columnCounts.resize(alignLength);
+        }
+        
+        for(int i=0;i<alignLength;i++){	
+            
+            if (m->control_pressed) { return 0; }
+            
+            theta[i].A += newTheta[i].A;		columnCounts[i] += newTheta[i].A;
+            theta[i].T += newTheta[i].T;		columnCounts[i] += newTheta[i].T;
+            theta[i].G += newTheta[i].G;		columnCounts[i] += newTheta[i].G;
+            theta[i].C += newTheta[i].C;		columnCounts[i] += newTheta[i].C;
+            theta[i].gap += newTheta[i].gap;	columnCounts[i] += newTheta[i].gap;
+        }
+        
+        numSeqs += newNumSeqs;
+        
+        return 0;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "AlignNode", "addThetas");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+double AlignNode::getSimToConsensus(string& query){
+	try {
+        double similarity = 0;
+        
+        int length = 0;
+        
+        for(int i=0;i<alignLength;i++){
+            
+            if (m->control_pressed) { return similarity; }
+            
+            char base = query[i];
+            
+            if(base != '.' && base != 'N' && columnCounts[i] != 0){
+                
+                double fraction = 0;
+                
+                if(base == 'A'){
+                    fraction = (int) theta[i].A / (double) columnCounts[i];
+                    similarity += fraction;
+                    length++;
+                }
+                else if(base == 'T'){
+                    fraction = (int) theta[i].T / (double) columnCounts[i];
+                    similarity += fraction;
+                    length++;
+                }
+                else if(base == 'G'){
+                    fraction = (int) theta[i].G / (double) columnCounts[i];
+                    similarity += fraction;
+                    length++;
+                }
+                else if(base == 'C'){
+                    fraction = (int) theta[i].C / (double) columnCounts[i];
+                    similarity += fraction;
+                    length++;
+                }
+                else if(base == '-'){
+                    fraction = (int) theta[i].gap / (double) columnCounts[i];
+                    similarity += fraction;
+                    length++;
+                }
+            }
+        }
+        
+        if(length != 0){
+            similarity /= double(length);
+        }
+        else {
+            similarity = 0;
+        }
+        
+        return similarity;	
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "AlignNode", "getSimToConsensus");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+double AlignNode::getPxGivenkj_D_j(string& query){	//P(x | k_j, D, j)
+	try {
+        double PxGivenkj_D_j = 0;
+        
+        int count = 0;
+        double alpha = 1 / (double)totalSeqs;	//flat prior
+        
+        
+        for(int s=0;s<alignLength;s++){
+            
+            if (m->control_pressed) { return PxGivenkj_D_j; }
+            
+            char base = query[s];
+            thetaAlign thetaS = theta[s];
+            
+            if(base != '.' && base != 'N' && columnCounts[s] != 0){
+                double Nkj_s = (double)columnCounts[s];	
+                double nkj_si = 0;
+                
+                
+                if(base == 'A')		{	nkj_si = (double)thetaS.A;		}
+                else if(base == 'T'){	nkj_si = (double)thetaS.T;		}	
+                else if(base == 'G'){	nkj_si = (double)thetaS.G;		}	
+                else if(base == 'C'){	nkj_si = (double)thetaS.C;		}	
+                else if(base == '-'){	nkj_si = (double)thetaS.gap;	}
+                else if(base == 'U'){	nkj_si = (double)thetaS.T;		}	
+                
+                //			double alpha = pow(0.2, double(Nkj_s)) + 0.0001;	//need to make 1e-4 a variable in future; this is the non-flat prior
+                
+                //			if(columnCounts[s] != nkj_si){						//deal only with segregating sites...
+				double numerator = nkj_si + alpha;
+				double denomenator = Nkj_s + 5.0 * alpha;
+				
+				PxGivenkj_D_j += log(numerator) - log(denomenator);		
+				count++;
+                //			}
+            }
+            if(base != '.' && columnCounts[s] == 0 && thetaS.gap == 0){
+                count = 0;
+                break;
+            }
+            
+        }
+        
+        if(count == 0){	PxGivenkj_D_j = -1e10;	}	
+        
+        return PxGivenkj_D_j;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "AlignNode", "getPxGivenkj_D_j");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
diff --git a/alignnode.h b/alignnode.h
new file mode 100755
index 0000000..4aecca7
--- /dev/null
+++ b/alignnode.h
@@ -0,0 +1,49 @@
+#ifndef ALIGNNODE
+#define ALIGNNODE
+
+/*
+ *  alignNode.h
+ *  bayesian
+ *
+ *  Created by Pat Schloss on 10/11/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "taxonomynode.h"
+
+/**************************************************************************************************/
+
+struct thetaAlign {
+	thetaAlign() : A(0), T(0), G(0), C(0), gap(0){}
+	unsigned int A;
+	unsigned int T;
+	unsigned int G;
+	unsigned int C;
+	unsigned int gap;
+};
+
+/**************************************************************************************************/
+
+class AlignNode : public TaxonomyNode {
+	
+public:
+	AlignNode(string, int);
+	int loadSequence(string&);
+	int checkTheta();
+    void printTheta();
+	double getPxGivenkj_D_j(string& query);	//P(x | k_j, D, j)
+	double getSimToConsensus(string& query);
+	vector<thetaAlign> getTheta()	{	return theta;	}
+	int addThetas(vector<thetaAlign>, int);
+	
+private:
+	vector<thetaAlign> theta;
+	vector<unsigned int> columnCounts;
+	int alignLength;
+};
+
+/**************************************************************************************************/
+
+#endif
+
diff --git a/aligntree.cpp b/aligntree.cpp
new file mode 100755
index 0000000..41667ca
--- /dev/null
+++ b/aligntree.cpp
@@ -0,0 +1,371 @@
+//
+//  alignTree.cpp
+//  pdsBayesian
+//
+//  Created by Patrick Schloss on 4/3/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "alignnode.h"
+#include "aligntree.h"
+
+/**************************************************************************************************/
+
+AlignTree::AlignTree(string referenceFileName, string taxonomyFileName, int cutoff) : Classify(), confidenceThreshold(cutoff){
+	try {
+        AlignNode* newNode = new AlignNode("Root", 0);
+        tree.push_back(newNode);			//	the tree is stored as a vector of elements of type TaxonomyNode
+        
+        string refTaxonomy;
+        
+        readTaxonomy(taxonomyFileName);
+     
+        ifstream referenceFile;
+        m->openInputFile(referenceFileName, referenceFile);
+        bool error = false;
+        map<int, int> lengths;
+        while(!referenceFile.eof()){
+            
+            if (m->control_pressed) { break; }
+            
+            Sequence seq(referenceFile);  m->gobble(referenceFile);
+            
+            if (seq.getName() != "") {
+                map<string, string>::iterator it = taxonomy.find(seq.getName());
+                
+                if (it != taxonomy.end()) {
+                    refTaxonomy = it->second;		//	lookup the taxonomy string for the current reference sequence
+                    string aligned = seq.getAligned();
+                    lengths[aligned.length()] = 1;
+                    if (lengths.size() > 1) { error = true; m->mothurOut("[ERROR]: reference sequences must be aligned to use the align method, quitting.\n"); break; }
+                    addTaxonomyToTree(seq.getName(), refTaxonomy, aligned);
+                }else {
+                    m->mothurOut(seq.getName() + " is in your reference file, but not in your taxonomy file, please correct.\n"); error = true;
+                }
+            }
+        }
+        referenceFile.close();
+        
+        length = (lengths.begin())->first;  
+           
+        if (error) { m->control_pressed = true; }
+        
+        numTaxa = (int)tree.size();
+        
+        numLevels = 0;
+        for(int i=0;i<numTaxa;i++){
+            int level = tree[i]->getLevel();
+            if(level > numLevels){	numLevels = level;	}
+        }
+        numLevels++;
+        
+        aggregateThetas();
+        
+        int dbSize = tree[0]->getNumSeqs();
+        
+        for(int i=0;i<numTaxa;i++){
+            tree[i]->checkTheta();
+            tree[i]->setTotalSeqs(dbSize);
+        }
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "AlignTree");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+AlignTree::~AlignTree(){
+	try {
+        for(int i=0;i<tree.size();i++){
+            delete tree[i];
+        }
+	}
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "~AlignTree");
+        exit(1);
+    }
+}	
+
+/**************************************************************************************************/
+
+int AlignTree::addTaxonomyToTree(string seqName, string& taxonomy, string& sequence){
+	try {
+        AlignNode* newNode;
+        string taxonName = "";
+        int treePosition = 0;							//	the root is element 0
+        
+        int level = 1;
+        
+        for(int i=0;i<taxonomy.length();i++){			//	step through taxonomy string...
+            
+            if (m->control_pressed) { break; }
+            
+            if(taxonomy[i] == ';'){						//	looking for semicolons...
+                
+                if (taxonName == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); m->control_pressed = true; }
+                
+                int newIndex = tree[treePosition]->getChildIndex(taxonName);	//	look to see if your current node already
+                //	has a child with the new taxonName
+                if(newIndex != -1)	{	treePosition = newIndex;	}		//	if you've seen it before, jump to that
+                else {														//	 position in the tree
+                    int newChildIndex = (int)tree.size();						//	otherwise, we'll have to create one...
+                    tree[treePosition]->makeChild(taxonName, newChildIndex);
+                    
+                    newNode = new AlignNode(taxonName, level);
+                    
+                    newNode->setParent(treePosition);
+                    
+                    tree.push_back(newNode);
+                    treePosition = newChildIndex;
+                }
+                
+                //	sequence data to that node to update that node's theta - seems slow...				
+                taxonName = "";								//	clear out the taxon name that we will build as we look 
+                level++;
+            }												//	for a semicolon
+            else{
+                taxonName += taxonomy[i];					//	keep adding letters until we reach a semicolon
+            }
+        }
+        tree[treePosition]->loadSequence(sequence);	//	now that we've gotten to the correct node, add the
+        
+        return 0;
+	}
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "addTaxonomyToTree");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+int AlignTree::aggregateThetas(){
+	try {
+        vector<vector<int> > levelMatrix(numLevels+1);
+        
+        for(int i=0;i<tree.size();i++){
+            if (m->control_pressed) { return 0; }
+            levelMatrix[tree[i]->getLevel()].push_back(i);
+        }
+		
+        for(int i=numLevels-1;i>0;i--){
+            if (m->control_pressed) { return 0; }
+            for(int j=0;j<levelMatrix[i].size();j++){
+                
+                AlignNode* holder = tree[levelMatrix[i][j]];
+                
+                tree[holder->getParent()]->addThetas(holder->getTheta(), holder->getNumSeqs());				
+            }
+        }
+	    return 0;
+	}
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "aggregateThetas");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+double AlignTree::getOutlierLogProbability(string& sequence){
+	try {
+        double count = 0;
+        
+        for(int i=0;i<sequence.length();i++){
+            
+            if(sequence[i] != '.'){	count++;	}
+            
+        }
+        
+        return count * log(0.2);
+    }
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "getOutlierLogProbability");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+int AlignTree::getMinRiskIndexAlign(string& sequence, vector<int>& taxaIndices, vector<double>& probabilities){
+	try {
+        int numProbs = (int)probabilities.size();
+        
+        vector<double> G(numProbs, 0.2);	//a random sequence will, on average, be 20% similar to any other sequence
+        vector<double> risk(numProbs, 0);
+        
+        for(int i=1;i<numProbs;i++){ //use if you want the outlier group
+            if (m->control_pressed) { return 0; }
+            G[i] = tree[taxaIndices[i]]->getSimToConsensus(sequence);
+        }
+        
+        double minRisk = 1e6;
+        int minRiskIndex = 0;
+        
+        for(int i=0;i<numProbs;i++){
+            if (m->control_pressed) { return 0; }
+            for(int j=0;j<numProbs;j++){
+                if(i != j){
+                    risk[i] += probabilities[j] * G[j];
+                }			
+            }
+            
+            if(risk[i] < minRisk){
+                minRisk = risk[i];
+                minRiskIndex = i;
+            }
+        }
+        
+        return minRiskIndex;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "getMinRiskIndexAlign");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+int AlignTree::sanityCheck(vector<vector<int> >& indices, vector<int>& maxIndices){
+	try {
+        int finalLevel = (int)indices.size()-1;
+        
+        for(int position=1;position<indices.size();position++){
+            if (m->control_pressed) { return 0; }
+            int predictedParent = tree[indices[position][maxIndices[position]]]->getParent();
+            int actualParent = indices[position-1][maxIndices[position-1]];
+            
+            if(predictedParent != actualParent){
+                finalLevel = position - 1;
+                return finalLevel;
+            }
+        }
+        return finalLevel;
+	}
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "sanityCheck");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+string AlignTree::getTaxonomy(Sequence* seq){
+    try {
+        string seqName = seq->getName(); string querySequence = seq->getAligned(); string taxonProbabilityString = "";
+        if (querySequence.length() != length) {
+            m->mothurOut("[ERROR]: " + seq->getName() + " has length " + toString(querySequence.length()) + ", reference sequences length is " + toString(length) + ". Are your sequences aligned? Sequences must be aligned to use the align search method.\n"); m->control_pressed = true; return "";
+        }
+        double logPOutlier = getOutlierLogProbability(querySequence);
+        
+        vector<vector<double> > pXgivenKj_D_j(numLevels);
+        vector<vector<int> > indices(numLevels);
+        for(int i=0;i<numLevels;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            pXgivenKj_D_j[i].push_back(logPOutlier);
+            indices[i].push_back(-1);
+        }
+        
+        
+        for(int i=0;i<numTaxa;i++){
+            //		cout << i << '\t' << tree[i]->getName() << '\t' << tree[i]->getLevel() << '\t' << tree[i]->getPxGivenkj_D_j(querySequence) << endl;
+            if (m->control_pressed) { return taxonProbabilityString; }
+            pXgivenKj_D_j[tree[i]->getLevel()].push_back(tree[i]->getPxGivenkj_D_j(querySequence));
+            indices[tree[i]->getLevel()].push_back(i);
+        }
+        
+        vector<double> sumLikelihood(numLevels, 0);
+        vector<double> bestPosterior(numLevels, 0);
+        vector<int> maxIndex(numLevels, 0);
+        int maxPosteriorIndex;
+        
+        
+        	//cout << "before best level" << endl;
+        
+        //let's find the best level and taxa within that level
+        for(int i=0;i<numLevels;i++){ //go across all j's - from the root to genus
+            if (m->control_pressed) { return taxonProbabilityString; }
+            int numTaxaInLevel = (int)indices[i].size();
+            
+            		//cout << "numTaxaInLevel:\t" << numTaxaInLevel << endl;
+            
+            vector<double> posteriors(numTaxaInLevel, 0);		
+            sumLikelihood[i] = getLogExpSum(pXgivenKj_D_j[i], maxPosteriorIndex);
+            
+            maxPosteriorIndex = 0;
+            for(int j=0;j<numTaxaInLevel;j++){
+                posteriors[j] = exp(pXgivenKj_D_j[i][j] - sumLikelihood[i]);
+                
+                if(posteriors[j] > posteriors[maxPosteriorIndex]){	
+                    maxPosteriorIndex = j;
+                }
+                
+            }
+            
+            maxIndex[i] = getMinRiskIndexAlign(querySequence, indices[i], posteriors);
+            
+            maxIndex[i] = maxPosteriorIndex;
+            bestPosterior[i] = posteriors[maxIndex[i]];	
+        }
+        
+        //	vector<double> pX_level(numLevels, 0);
+        //	
+        //	for(int i=0;i<numLevels;i++){
+        //		pX_level[i] = pXgivenKj_D_j[i][maxIndex[i]] - tree[indices[i][maxIndex[i]]]->getNumSeqs();
+        //	}
+        //	
+        //	int max_pLevel_X_index = -1;
+        //	double pX_level_sum = getLogExpSum(pX_level, max_pLevel_X_index);
+        //	double max_pLevel_X = exp(pX_level[max_pLevel_X_index] - pX_level_sum);
+        //	
+        //	vector<double> pLevel_X(numLevels, 0);
+        //	for(int i=0;i<numLevels;i++){
+        //		pLevel_X[i] = exp(pX_level[i] - pX_level_sum);
+        //	}
+        
+        
+        
+        
+        int saneDepth = sanityCheck(indices, maxIndex);
+        
+        simpleTax = "";
+        int savedspot = 1;
+        taxonProbabilityString = "";
+        for(int i=1;i<=saneDepth;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            int confidenceScore = (int) (bestPosterior[i] * 100);
+            if (confidenceScore >= confidenceThreshold) {
+            if(indices[i][maxIndex[i]] != -1){
+                taxonProbabilityString += tree[indices[i][maxIndex[i]]]->getName() + '(' + toString(confidenceScore) + ");";
+                simpleTax += tree[indices[i][maxIndex[i]]]->getName() + ";";
+                //			levelProbabilityOutput << tree[indices[i][maxIndex[i]]]->getName() << '(' << setprecision(6) << pLevel_X[i] << ");";
+            }
+            else{
+                taxonProbabilityString + "unclassified" + '(' + toString(confidenceScore) + ");";
+                //			levelProbabilityOutput << "unclassified" << '(' << setprecision(6) << pLevel_X[i] << ");";
+                simpleTax += "unclassified;";
+            }
+            }else { break; }
+            savedspot = i;
+        }
+        
+        for(int i=savedspot+1;i<numLevels;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            taxonProbabilityString + "unclassified(0);";
+            simpleTax += "unclassified;";
+        }
+        
+        return taxonProbabilityString;
+	}
+    catch(exception& e) {
+        m->errorOut(e, "AlignTree", "getTaxonomy");
+        exit(1);
+    }
+}
+
+
+/**************************************************************************************************/
diff --git a/aligntree.h b/aligntree.h
new file mode 100755
index 0000000..51008ff
--- /dev/null
+++ b/aligntree.h
@@ -0,0 +1,34 @@
+//
+//  alignTree.h
+//  pdsBayesian
+//
+//  Created by Patrick Schloss on 4/3/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#ifndef pdsBayesian_alignTree_h
+#define pdsBayesian_alignTree_h
+
+#include "classify.h"
+
+class AlignNode;
+
+class AlignTree : public Classify {
+
+public:
+	AlignTree(string, string, int);
+	~AlignTree();
+	string getTaxonomy(Sequence*);
+	
+private:
+    int addTaxonomyToTree(string, string&, string&);
+	double getOutlierLogProbability(string&);
+	int getMinRiskIndexAlign(string&, vector<int>&, vector<double>&);
+	int aggregateThetas();
+	int sanityCheck(vector<vector<int> >&, vector<int>&);
+
+	int numSeqs, confidenceThreshold, length;
+	vector<AlignNode*> tree;
+};
+
+#endif
diff --git a/classify.cpp b/classify.cpp
index 459c90f..8aa3cdb 100644
--- a/classify.cpp
+++ b/classify.cpp
@@ -354,3 +354,37 @@ vector<string> Classify::parseTax(string tax) {
 }
 /**************************************************************************************************/
 
+double Classify::getLogExpSum(vector<double> probabilities, int& maxIndex){
+	try {
+        //	http://jblevins.org/notes/log-sum-exp
+        
+        double maxProb = probabilities[0];
+        maxIndex = 0;
+        
+        int numProbs = (int)probabilities.size();
+        
+        for(int i=1;i<numProbs;i++){
+            if(probabilities[i] >= maxProb){
+                maxProb = probabilities[i];
+                maxIndex = i;
+            }
+        }
+        
+        double probSum = 0.0000;
+        
+        for(int i=0;i<numProbs;i++){
+            probSum += exp(probabilities[i] - maxProb);		
+        }
+        
+        probSum = log(probSum) + maxProb;
+        
+        return probSum;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "Classify", "getLogExpSum");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
diff --git a/classify.h b/classify.h
index 6582be4..7b4c102 100644
--- a/classify.h
+++ b/classify.h
@@ -17,10 +17,8 @@
 #include "database.hpp"
 #include "phylotree.h"
 
-
 class Sequence;
 
-
 /**************************************************************************************************/
 
 class Classify {
@@ -37,7 +35,6 @@ public:
 protected:
 
 	map<string, string> taxonomy;  //name maps to taxonomy
-	//map<string, int> genusCount;  //maps genus to count - in essence a list of how many seqs are in each taxonomy
 	map<string, int>::iterator itTax;
 	map<string, string>::iterator it;
 	Database* database;
@@ -45,11 +42,12 @@ protected:
 	
 	string taxFile, templateFile, simpleTax;
 	vector<string> names;
-	int threadID;
+	int threadID, numLevels, numTaxa;
 	bool flip, flipped, shortcuts;
 	
 	int readTaxonomy(string);
 	vector<string> parseTax(string);
+    double getLogExpSum(vector<double>, int&);
 	MothurOut* m;
 	
 };
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp
index 2e55673..36bccc2 100644
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -21,9 +21,9 @@ vector<string> ClassifySeqsCommand::setParameters(){
         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
 		CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
 
-		CommandParameter psearch("search", "Multiple", "kmer-blast-suffix-distance", "kmer", "", "", "",false,false); parameters.push_back(psearch);
+		CommandParameter psearch("search", "Multiple", "kmer-blast-suffix-distance-align", "kmer", "", "", "",false,false); parameters.push_back(psearch);
 		CommandParameter pksize("ksize", "Number", "", "8", "", "", "",false,false); parameters.push_back(pksize);
-		CommandParameter pmethod("method", "Multiple", "bayesian-knn", "bayesian", "", "", "",false,false); parameters.push_back(pmethod);
+		CommandParameter pmethod("method", "Multiple", "wang-knn-zap", "wang", "", "", "",false,false); parameters.push_back(pmethod);
 		CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
 		CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
 		CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
@@ -55,11 +55,11 @@ string ClassifySeqsCommand::getHelpString(){
 		helpString += "The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n";
 		helpString += "The classify.seqs command parameters are reference, fasta, name, group, count, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n";
 		helpString += "The reference, fasta and taxonomy parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
-		helpString += "The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer, blast and distance. The default is kmer.\n";
+		helpString += "The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer, blast, align and distance. The default is kmer.\n";
 		helpString += "The name parameter allows you add a names file with your fasta file, if you enter multiple fasta files, you must enter matching names files for them.\n";
 		helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
         helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
-		helpString += "The method parameter allows you to specify classification method to use.  Your options are: bayesian and knn. The default is bayesian.\n";
+		helpString += "The method parameter allows you to specify classification method to use.  Your options are: wang, knn and zap. The default is wang.\n";
 		helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n";
 		helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
 #ifdef USE_MPI
@@ -72,8 +72,8 @@ string ClassifySeqsCommand::getHelpString(){
 		helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
 		helpString += "The numwanted parameter allows you to specify the number of sequence matches you want with the knn method.  The default is 10.\n";
 		helpString += "The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy.  The default is 0.\n";
-		helpString += "The probs parameter shuts off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be shown.\n";
-		helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the bayesian method.  The default is 100.\n";
+		helpString += "The probs parameter shuts off the bootstrapping results for the wang and zap method. The default is true, meaning you want the bootstrapping to be shown.\n";
+		helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the wang method.  The default is 100.\n";
 		//helpString += "The flip parameter allows you shut off mothur's   The default is T.\n";
 		helpString += "The classify.seqs command should be in the following format: \n";
 		helpString += "classify.seqs(reference=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n";
@@ -492,9 +492,6 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
 			//check for optional parameter and set defaults
 			// ...at some point should added some additional type checking...
 			string temp;
-			temp = validParameter.validFile(parameters, "ksize", false);		if (temp == "not found"){	temp = "8";				}
-			m->mothurConvert(temp, kmerSize); 
-			
 			temp = validParameter.validFile(parameters, "processors", false);	if (temp == "not found"){	temp = m->getProcessors();	}
 			m->setProcessors(temp);
 			m->mothurConvert(temp, processors); 
@@ -536,7 +533,13 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
 			
 			search = validParameter.validFile(parameters, "search", false);		if (search == "not found"){	search = "kmer";		}
 			
-			method = validParameter.validFile(parameters, "method", false);		if (method == "not found"){	method = "bayesian";	}
+			method = validParameter.validFile(parameters, "method", false);		if (method == "not found"){	method = "wang";	}
+            
+            temp = validParameter.validFile(parameters, "ksize", false);		if (temp == "not found"){	
+                temp = "8";	
+                if (method == "zap") { temp = "7"; }
+            }
+			m->mothurConvert(temp, kmerSize); 
 			
 			temp = validParameter.validFile(parameters, "match", false);		if (temp == "not found"){	temp = "1.0";			}
 			m->mothurConvert(temp, match);  
@@ -570,8 +573,13 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
 			m->mothurConvert(temp, iters); 
 
 			
-			if ((method == "bayesian") && (search != "kmer"))  { 
-				m->mothurOut("The bayesian method requires the kmer search." + search + "will be disregarded." ); m->mothurOutEndLine();
+			if ((method == "wang") && (search != "kmer"))  { 
+				m->mothurOut("The wang method requires the kmer search. " + search + " will be disregarded, and kmer will be used." ); m->mothurOutEndLine();
+				search = "kmer";
+			}
+            
+            if ((method == "zap") && ((search != "kmer") && (search != "align")))  { 
+				m->mothurOut("The zap method requires the kmer or align search. " + search + " will be disregarded, and kmer will be used." ); m->mothurOutEndLine();
 				search = "kmer";
 			}
 			
@@ -605,10 +613,16 @@ int ClassifySeqsCommand::execute(){
 	try {
 		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
         
-		if(method == "bayesian"){	classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);		}
+        string outputMethodTag = method + ".";
+		if(method == "wang"){	classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);	}
 		else if(method == "knn"){	classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());				}
+        else if(method == "zap"){	
+            outputMethodTag = search + "_" + outputMethodTag;
+            if (search == "kmer") {   classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); }
+            else {  classify = new AlignTree(templateFileName, taxonomyFileName, cutoff);  }
+        }
 		else {
-			m->mothurOut(search + " is not a valid method option. I will run the command using bayesian.");
+			m->mothurOut(search + " is not a valid method option. I will run the command using wang.");
 			m->mothurOutEndLine();
 			classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);	
 		}
@@ -633,10 +647,10 @@ int ClassifySeqsCommand::execute(){
             if (RippedTaxName != "") { RippedTaxName +=  "."; }   
           
 			if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
-			string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("taxonomy");
-			string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("accnos");
+			string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + outputMethodTag + getOutputFileNameTag("taxonomy");
+			string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + outputMethodTag +getOutputFileNameTag("accnos");
 			string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
-			string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + getOutputFileNameTag("taxsummary");
+			string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + outputMethodTag + getOutputFileNameTag("taxsummary");
 			
 			if ((method == "knn") && (search == "distance")) { 
 				string DistName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("matchdist");
@@ -798,12 +812,12 @@ int ClassifySeqsCommand::execute(){
                 if (hasCount) { 
                     ct = new CountTable();
                     ct->readTable(countfileNames[s]);
-                    taxaSum = new PhyloSummary(baseTName, ct);
+                    taxaSum = new PhyloSummary(taxonomyFileName, ct);
                     taxaSum->summarize(tempTaxonomyFile);
                 }else {
                     if (groupfile != "") {  group = groupfileNames[s]; groupMap = new GroupMap(group); groupMap->readMap(); }
                     
-                    taxaSum = new PhyloSummary(baseTName, groupMap);
+                    taxaSum = new PhyloSummary(taxonomyFileName, groupMap);
                     
                     if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);	} delete classify; return 0; }
                     
diff --git a/classifyseqscommand.h b/classifyseqscommand.h
index 6d11d92..0cffec6 100644
--- a/classifyseqscommand.h
+++ b/classifyseqscommand.h
@@ -19,9 +19,11 @@
 #include "phylotree.h"
 #include "phylosummary.h"
 #include "knn.h"
+#include "kmertree.h"
+#include "aligntree.h"
 
 
-//KNN and Bayesian methods modeled from algorithms in
+//KNN and Wang methods modeled from algorithms in
 //NaÄ±Â¨ve Bayesian Classiï¬er for Rapid Assignment of rRNA Sequences 
 //into the New Bacterial Taxonomyô°â  
 //Qiong Wang,1 George M. Garrity,1,2 James M. Tiedje,1,2 and James R. Cole1* 
@@ -166,6 +168,11 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
 		Classify* myclassify;
 		if(pDataArray->method == "bayesian"){	myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts);		}
 		else if(pDataArray->method == "knn"){	myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID);				}
+        else if(pDataArray->method == "zap"){	
+            outputMethodTag = search + "_" + outputMethodTag;
+            if (pDataArray->search == "kmer") {   myclassify = new KmerTree(pDataArray->templateFileName, pDataArray->taxonomyFileName, pDataArray->kmerSize, pDataArray->cutoff); }
+            else {  myclassify = new AlignTree(pDataArray->templateFileName, pDataArray->taxonomyFileName, pDataArray->cutoff);  }
+        }
 		else {
 			pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
 			pDataArray->m->mothurOutEndLine();
diff --git a/kmernode.cpp b/kmernode.cpp
new file mode 100755
index 0000000..c087cac
--- /dev/null
+++ b/kmernode.cpp
@@ -0,0 +1,209 @@
+/*
+ *  kmerNode.cpp
+ *  bayesian
+ *
+ *  Created by Pat Schloss on 10/11/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "kmernode.h"
+
+
+/**********************************************************************************************************************/
+
+KmerNode::KmerNode(string s, int l, int n) : TaxonomyNode(s, l), kmerSize(n) {
+	try {
+        int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+        
+        numPossibleKmers = power4s[kmerSize];
+        numUniqueKmers = 0;
+        
+        kmerVector.assign(numPossibleKmers, 0);
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "KmerNode");
+		exit(1);
+	}
+}
+
+/**********************************************************************************************************************/
+
+void KmerNode::loadSequence(vector<int>& kmerProfile){
+	try {
+        for(int i=0;i<numPossibleKmers;i++){
+            if (m->control_pressed) { break; }
+            if(kmerVector[i] == 0 && kmerProfile[i] != 0)	{	numUniqueKmers++;	}
+            
+            kmerVector[i] += kmerProfile[i];
+        }
+        
+        numSeqs++;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "loadSequence");
+		exit(1);
+	}
+}	
+
+/**********************************************************************************************************************/
+
+string KmerNode::getKmerBases(int kmerNumber){
+	try {
+        //	Here we convert the kmer number into the kmer in terms of bases.
+        //
+        //	Example:	Score = 915 (for a 6-mer)
+        //				Base6 = (915 / 4^0) % 4 = 915 % 4 = 3 => T	[T]
+        //				Base5 = (915 / 4^1) % 4 = 228 % 4 = 0 => A	[AT]
+        //				Base4 = (915 / 4^2) % 4 = 57 % 4 = 1 => C	[CAT]
+        //				Base3 = (915 / 4^3) % 4 = 14 % 4 = 2 => G	[GCAT]
+        //				Base2 = (915 / 4^4) % 4 = 3 % 4 = 3 => T	[TGCAT]
+        //				Base1 = (915 / 4^5) % 4 = 0 % 4 = 0 => A	[ATGCAT] -> this checks out with the previous method
+        
+        int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+        
+        string kmer = "";
+        
+        if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){	//	if the kmer number is the same as the maxKmer then it must
+            for(int i=0;i<kmerSize;i++){					//	have had an N in it and so we'll just call it N x kmerSize
+                kmer += 'N';
+            }
+        }
+        else{
+            for(int i=0;i<kmerSize;i++){
+                if (m->control_pressed) { return kmer; }
+                int nt = (int)(kmerNumber / (float)power4s[i]) % 4;		//	the '%' operator returns the remainder 
+                if(nt == 0)		{	kmer = 'A' + kmer;	}				//	from int-based division ]
+                else if(nt == 1){	kmer = 'C' + kmer;	}
+                else if(nt == 2){	kmer = 'G' + kmer;	}
+                else if(nt == 3){	kmer = 'T' + kmer;	}
+            }
+        }
+        return kmer;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "getKmerBases");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+void KmerNode::addThetas(vector<int> newTheta, int newNumSeqs){
+	try {
+        for(int i=0;i<numPossibleKmers;i++){
+            if (m->control_pressed) { break; }
+            kmerVector[i] += newTheta[i];		
+        }
+        
+        //	if(alignLength == 0){
+        //		alignLength = (int)newTheta.size();
+        //		theta.resize(alignLength);
+        //		columnCounts.resize(alignLength);
+        //	}
+        //	
+        //	for(int i=0;i<alignLength;i++){	
+        //		theta[i].A += newTheta[i].A;		columnCounts[i] += newTheta[i].A;
+        //		theta[i].T += newTheta[i].T;		columnCounts[i] += newTheta[i].T;
+        //		theta[i].G += newTheta[i].G;		columnCounts[i] += newTheta[i].G;
+        //		theta[i].C += newTheta[i].C;		columnCounts[i] += newTheta[i].C;
+        //		theta[i].gap += newTheta[i].gap;	columnCounts[i] += newTheta[i].gap;
+        //	}
+        
+        numSeqs += newNumSeqs;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "addThetas");
+		exit(1);
+	}
+}
+
+/**********************************************************************************************************************/
+
+int KmerNode::getNumUniqueKmers(){
+    try {
+        if(numUniqueKmers == 0){
+            
+            for(int i=0;i<numPossibleKmers;i++){
+                if (m->control_pressed) { return numUniqueKmers; }
+                if(kmerVector[i] != 0){
+                    numUniqueKmers++;
+                }
+                
+            }
+            
+        }
+        
+        return numUniqueKmers;	
+        
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "getNumUniqueKmers");
+		exit(1);
+	}
+}
+
+/**********************************************************************************************************************/
+
+void KmerNode::printTheta(){
+	try {
+        m->mothurOut(name + "\n");
+        for(int i=0;i<numPossibleKmers;i++){
+            if(kmerVector[i] != 0){
+                m->mothurOut(getKmerBases(i) + '\t' + toString(kmerVector[i]) + "\n");
+            }
+        }
+        m->mothurOutEndLine();	
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "printTheta");
+		exit(1);
+	}
+	
+}
+/**************************************************************************************************/
+
+double KmerNode::getSimToConsensus(vector<int>& queryKmerProfile){
+	try {
+        double present = 0;
+        
+        for(int i=0;i<numPossibleKmers;i++){
+            if (m->control_pressed) { return present; }
+            if(queryKmerProfile[i] != 0 && kmerVector[i] != 0){
+                present++;
+            }
+        }	
+        
+        return present / double(queryKmerProfile.size() - kmerSize + 1);
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "getSimToConsensus");
+		exit(1);
+	}
+}
+
+/**********************************************************************************************************************/
+
+double KmerNode::getPxGivenkj_D_j(vector<int>& queryKmerProfile)	{	
+	try {
+        double sumLogProb = 0.0000;
+        double alpha = 1.0 / (double)totalSeqs;	//flat prior
+        //	double alpha = pow((1.0 / (double)numUniqueKmers), numSeqs)+0.0001;	//non-flat prior
+        
+        for(int i=0;i<numPossibleKmers;i++){
+            if (m->control_pressed) { return sumLogProb; }
+            if(queryKmerProfile[i] != 0){		//numUniqueKmers needs to be the value from Root;
+                sumLogProb += log((kmerVector[i] + alpha) / (numSeqs + numUniqueKmers * alpha));
+            }
+            
+        }
+        return sumLogProb;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerNode", "getPxGivenkj_D_j");
+		exit(1);
+	}
+
+}
+
+/**********************************************************************************************************************/
diff --git a/kmernode.h b/kmernode.h
new file mode 100755
index 0000000..e15fb1d
--- /dev/null
+++ b/kmernode.h
@@ -0,0 +1,45 @@
+#ifndef KMERNODE
+#define KMERNODE
+
+/*
+ *  kmerNode.h
+ *  bayesian
+ *
+ *  Created by Pat Schloss on 10/11/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+
+#include "taxonomynode.h"
+
+/**********************************************************************************************************************/
+
+class KmerNode : public TaxonomyNode {
+	
+public:
+	KmerNode(string, int, int);
+	void loadSequence(vector<int>&);
+	void printTheta();
+	double getPxGivenkj_D_j(vector<int>&);
+	double getSimToConsensus(vector<int>&);
+	void checkTheta(){};
+	void setNumUniqueKmers(int num)	{	numUniqueKmers = num;	}
+	int getNumUniqueKmers();
+	void addThetas(vector<int>, int);
+	vector<int> getTheta()	{	return kmerVector;	}
+
+
+private:
+	string getKmerBases(int);
+	int kmerSize;								//	value of k
+	int numPossibleKmers;						//	4^kmerSize
+	int numUniqueKmers;							//	number of unique kmers seen in a group ~ O_kj
+	int numKmers;								//	number of kmers in a sequence
+	vector<int> kmerVector;						//	counts of kmers across all sequences in a node
+};
+
+/**********************************************************************************************************************/
+
+#endif
+
diff --git a/kmertree.cpp b/kmertree.cpp
new file mode 100755
index 0000000..fbf2bfb
--- /dev/null
+++ b/kmertree.cpp
@@ -0,0 +1,386 @@
+//
+//  kmerTree.cpp
+//  pdsBayesian
+//
+//  Created by Patrick Schloss on 4/3/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "kmernode.h"
+#include "kmertree.h"
+
+/**************************************************************************************************/
+
+KmerTree::KmerTree(string referenceFileName, string taxonomyFileName, int k, int cutoff) : Classify(), confidenceThreshold(cutoff), kmerSize(k){
+	try {
+        KmerNode* newNode = new KmerNode("Root", 0, kmerSize);
+        tree.push_back(newNode);			//	the tree is stored as a vector of elements of type TaxonomyNode
+        
+        int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+        numPossibleKmers = power4s[kmerSize];
+        
+        string refTaxonomy;
+        
+        readTaxonomy(taxonomyFileName);
+        
+        ifstream referenceFile;
+        m->openInputFile(referenceFileName, referenceFile);
+        bool error = false;
+        while(!referenceFile.eof()){
+            
+            if (m->control_pressed) { break; }
+            
+            Sequence seq(referenceFile);  m->gobble(referenceFile);
+            
+            if (seq.getName() != "") {
+                map<string, string>::iterator it = taxonomy.find(seq.getName());
+                
+                if (it != taxonomy.end()) {
+                    refTaxonomy = it->second;		//	lookup the taxonomy string for the current reference sequence
+                    vector<int> kmerProfile = ripKmerProfile(seq.getUnaligned());	//convert to kmer vector
+                    addTaxonomyToTree(seq.getName(), refTaxonomy, kmerProfile);
+                }else {
+                    m->mothurOut(seq.getName() + " is in your reference file, but not in your taxonomy file, please correct.\n"); error = true;
+                }
+            }
+        }
+        referenceFile.close();
+        
+        if (error) { m->control_pressed = true; }
+        
+        numTaxa = (int)tree.size();
+        numLevels = 0;
+        for(int i=0;i<numTaxa;i++){
+            int level = tree[i]->getLevel();
+            if(level > numLevels){	numLevels = level;	}
+        }
+        numLevels++;
+        
+        aggregateThetas();
+        
+        int dbSize = tree[0]->getNumSeqs();
+        
+        for(int i=0;i<numTaxa;i++){
+            tree[i]->checkTheta();
+            tree[i]->setNumUniqueKmers(tree[0]->getNumUniqueKmers());
+            tree[i]->setTotalSeqs(dbSize);
+        }
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "KmerTree");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+KmerTree::~KmerTree(){
+	
+	for(int i=0;i<tree.size();i++){
+		delete tree[i];
+	}
+	
+}	
+/**********************************************************************************************************************/
+
+vector<int> KmerTree::ripKmerProfile(string sequence){
+    try {
+        //	assume all input sequences are unaligned
+        
+        int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+        
+        int nKmers = (int)sequence.length() - kmerSize + 1;
+        
+        vector<int> kmerProfile(numPossibleKmers + 1, 0);
+        
+        for(int i=0;i<nKmers;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int kmer = 0;
+            for(int j=0;j<kmerSize;j++){
+                if(toupper(sequence[j+i]) == 'A')		{	kmer += (0 * power4s[kmerSize-j-1]);	}
+                else if(toupper(sequence[j+i]) == 'C')	{	kmer += (1 * power4s[kmerSize-j-1]);	}
+                else if(toupper(sequence[j+i]) == 'G')	{	kmer += (2 * power4s[kmerSize-j-1]);	}
+                else if(toupper(sequence[j+i]) == 'U')	{	kmer += (3 * power4s[kmerSize-j-1]);	}
+                else if(toupper(sequence[j+i]) == 'T')	{	kmer += (3 * power4s[kmerSize-j-1]);	}
+                else									{	kmer = power4s[kmerSize]; j = kmerSize;	}
+            }
+            kmerProfile[kmer] = 1;
+        }
+        
+        return kmerProfile;	
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "ripKmerProfile");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+int KmerTree::addTaxonomyToTree(string seqName, string taxonomy, vector<int>& sequence){
+	try {
+        KmerNode* newNode;
+        string taxonName = "";
+        int treePosition = 0;							//	the root is element 0
+        
+        
+        int level = 1;
+        
+        for(int i=0;i<taxonomy.length();i++){			//	step through taxonomy string...
+            
+            if (m->control_pressed) { break; }
+            if(taxonomy[i] == ';'){						//	looking for semicolons...
+                
+                if (taxonName == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); m->control_pressed = true; }
+                
+                int newIndex = tree[treePosition]->getChildIndex(taxonName);//	look to see if your current node already
+                //	   has a child with the new taxonName
+                if(newIndex != -1)	{	treePosition = newIndex;	}		//	if you've seen it before, jump to that
+                else {														//	   position in the tree
+                    int newChildIndex = (int)tree.size();					//	otherwise, we'll have to create one...
+                    tree[treePosition]->makeChild(taxonName, newChildIndex);
+                    
+                    newNode = new KmerNode(taxonName, level, kmerSize);
+                    
+                    newNode->setParent(treePosition);
+                    
+                    tree.push_back(newNode);
+                    treePosition = newChildIndex;
+                }
+                
+                //	sequence data to that node to update that node's theta - seems slow...				
+                taxonName = "";								//	clear out the taxon name that we will build as we look 
+                level++;
+                
+            }												//	for a semicolon
+            else{
+                taxonName += taxonomy[i];					//	keep adding letters until we reach a semicolon
+            }
+        }
+        
+        tree[treePosition]->loadSequence(sequence);	//	now that we've gotten to the correct node, add the
+        
+        return 0;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "addTaxonomyToTree");
+		exit(1);
+	}
+	
+}
+
+/**************************************************************************************************/
+
+int KmerTree::aggregateThetas(){
+	try {
+        vector<vector<int> > levelMatrix(numLevels+1);
+        
+        for(int i=0;i<tree.size();i++){
+            if (m->control_pressed) { return 0; }
+            levelMatrix[tree[i]->getLevel()].push_back(i);
+        }
+        
+        for(int i=numLevels-1;i>0;i--) {
+            if (m->control_pressed) { return 0; }
+            
+            for(int j=0;j<levelMatrix[i].size();j++){
+                
+                KmerNode* holder = tree[levelMatrix[i][j]];
+                
+                tree[holder->getParent()]->addThetas(holder->getTheta(), holder->getNumSeqs());				
+            }
+        }
+        
+        return 0;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "aggregateThetas");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+int KmerTree::getMinRiskIndexKmer(vector<int>& sequence, vector<int>& taxaIndices, vector<double>& probabilities){
+	try {
+        int numProbs = (int)probabilities.size();
+        
+        vector<double> G(numProbs, 0.2);	//a random sequence will, on average, be 20% similar to any other sequence; not sure that this holds up for kmers; whatever.
+        vector<double> risk(numProbs, 0);
+        
+        for(int i=1;i<numProbs;i++){ //use if you want the outlier group
+            if (m->control_pressed) { return 0; }
+            G[i] = tree[taxaIndices[i]]->getSimToConsensus(sequence);
+        }
+        
+        double minRisk = 1e6;
+        int minRiskIndex = 0;
+        
+        for(int i=0;i<numProbs;i++){
+            if (m->control_pressed) { return 0; }
+            for(int j=0;j<numProbs;j++){
+                if(i != j){
+                    risk[i] += probabilities[j] * G[j];
+                }			
+            }
+            
+            if(risk[i] < minRisk){
+                minRisk = risk[i];
+                minRiskIndex = i;
+            }
+        }
+        
+        return minRiskIndex;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "getMinRiskIndexKmer");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+
+int KmerTree::sanityCheck(vector<vector<int> >& indices, vector<int>& maxIndices){
+	try {
+        int finalLevel = (int)indices.size()-1;
+        
+        for(int position=1;position<indices.size();position++){
+            if (m->control_pressed) { return 0; }
+            int predictedParent = tree[indices[position][maxIndices[position]]]->getParent();
+            int actualParent = indices[position-1][maxIndices[position-1]];
+            
+            if(predictedParent != actualParent){
+                finalLevel = position - 1;
+                return finalLevel;
+            }
+        }
+        return finalLevel;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "sanityCheck");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
+string KmerTree::getTaxonomy(Sequence* thisSeq){
+	try {
+        string seqName = thisSeq->getName(); string querySequence = thisSeq->getAligned(); string taxonProbabilityString = "";
+        string unalignedSeq = thisSeq->getUnaligned();
+        
+        double logPOutlier = (querySequence.length() - kmerSize + 1) * log(1.0/(double)tree[0]->getNumUniqueKmers());
+        
+        vector<int> queryProfile = ripKmerProfile(unalignedSeq);	//convert to kmer vector
+        
+        vector<vector<double> > pXgivenKj_D_j(numLevels);
+        vector<vector<int> > indices(numLevels);
+        for(int i=0;i<numLevels;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            pXgivenKj_D_j[i].push_back(logPOutlier);
+            indices[i].push_back(-1);
+        }
+        
+        for(int i=0;i<numTaxa;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            pXgivenKj_D_j[tree[i]->getLevel()].push_back(tree[i]->getPxGivenkj_D_j(queryProfile));
+            indices[tree[i]->getLevel()].push_back(i);
+        }
+        
+        vector<double> sumLikelihood(numLevels, 0);
+        vector<double> bestPosterior(numLevels, 0);
+        vector<int> maxIndex(numLevels, 0);
+        int maxPosteriorIndex;
+        
+        //let's find the best level and taxa within that level
+        for(int i=0;i<numLevels;i++){ //go across all j's - from the root to genus
+            if (m->control_pressed) { return taxonProbabilityString; }
+            
+            int numTaxaInLevel = (int)indices[i].size();
+            
+            vector<double> posteriors(numTaxaInLevel, 0);		
+            sumLikelihood[i] = getLogExpSum(pXgivenKj_D_j[i], maxPosteriorIndex);
+            
+            maxPosteriorIndex = 0;
+            for(int j=0;j<numTaxaInLevel;j++){
+                posteriors[j] = exp(pXgivenKj_D_j[i][j] - sumLikelihood[i]);
+                if(posteriors[j] > posteriors[maxPosteriorIndex]){	
+                    maxPosteriorIndex = j;
+                }
+                
+            }
+            
+            maxIndex[i] = getMinRiskIndexKmer(queryProfile, indices[i], posteriors);
+            
+            maxIndex[i] = maxPosteriorIndex;
+            bestPosterior[i] = posteriors[maxIndex[i]];	
+        }
+        
+        //	vector<double> pX_level(numLevels, 0);
+        //	
+        //	for(int i=0;i<numLevels;i++){
+        //		pX_level[i] = pXgivenKj_D_j[i][maxIndex[i]] - tree[indices[i][maxIndex[i]]]->getNumSeqs();
+        //	}
+        //	
+        //	int max_pLevel_X_index = -1;
+        //	double pX_level_sum = getLogExpSum(pX_level, max_pLevel_X_index);
+        //	double max_pLevel_X = exp(pX_level[max_pLevel_X_index] - pX_level_sum);
+        //	
+        //	vector<double> pLevel_X(numLevels, 0);
+        //	for(int i=0;i<numLevels;i++){
+        //		pLevel_X[i] = exp(pX_level[i] - pX_level_sum);
+        //	}
+        
+        int saneDepth = sanityCheck(indices, maxIndex);
+        
+        
+        //	stringstream levelProbabilityOutput;
+        //	levelProbabilityOutput.setf(ios::fixed, ios::floatfield);
+        //	levelProbabilityOutput.setf(ios::showpoint);
+        
+        
+        //taxonProbabilityOutput << seqName << '\t';
+        //	taxonProbabilityOutput << seqName << '(' << max_pLevel_X_index << ';' << max_pLevel_X << ')' << '\t';
+        //	levelProbabilityOutput << seqName << '(' << max_pLevel_X_index << ';' << max_pLevel_X << ')' << '\t';
+        simpleTax = "";
+        int savedspot = 1;
+        taxonProbabilityString = "";
+        for(int i=1;i<=saneDepth;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            int confidenceScore = (int) (bestPosterior[i] * 100);
+            if (confidenceScore >= confidenceThreshold) {
+                if(indices[i][maxIndex[i]] != -1){
+                    taxonProbabilityString += tree[indices[i][maxIndex[i]]]->getName() + "(" + toString(confidenceScore) + ");";
+                    simpleTax += tree[indices[i][maxIndex[i]]]->getName() + ";";
+                    
+                    //			levelProbabilityOutput << tree[indices[i][maxIndex[i]]]->getName() << '(' << setprecision(6) << pLevel_X[i] << ");";
+                }
+                else{
+                    taxonProbabilityString += "unclassified" + '(' + toString(confidenceScore) + ");";
+                    //			levelProbabilityOutput << "unclassified" << '(' << setprecision(6) << pLevel_X[i] << ");";
+                    simpleTax += "unclassified;";
+                }
+            }else { break; }
+            savedspot = i;
+        }
+        
+        
+        
+        for(int i=savedspot+1;i<numLevels;i++){
+            if (m->control_pressed) { return taxonProbabilityString; }
+            taxonProbabilityString += "unclassified(0);";
+            simpleTax += "unclassified;";
+        }
+        
+        return taxonProbabilityString;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "KmerTree", "getTaxonomy");
+		exit(1);
+	}
+}
+
+
+/**************************************************************************************************/
+
diff --git a/kmertree.h b/kmertree.h
new file mode 100755
index 0000000..f7c10ef
--- /dev/null
+++ b/kmertree.h
@@ -0,0 +1,37 @@
+//
+//  kmerTree.h
+//  pdsBayesian
+//
+//  Created by Patrick Schloss on 4/3/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#ifndef pdsBayesian_kmerTree_h
+#define pdsBayesian_kmerTree_h
+
+#include "classify.h"
+
+class KmerNode;
+
+class KmerTree : public Classify {
+	
+public:
+	KmerTree(string, string, int, int);
+	~KmerTree();
+	
+    string getTaxonomy(Sequence*);
+
+private:
+    int addTaxonomyToTree(string, string, vector<int>&);
+	vector<int> ripKmerProfile(string);
+	int getMinRiskIndexKmer(vector<int>&, vector<int>&, vector<double>&);
+	int aggregateThetas();
+	int sanityCheck(vector<vector<int> >&, vector<int>&);
+
+	int kmerSize;
+	int numPossibleKmers, confidenceThreshold;
+	vector<KmerNode*> tree;
+
+};
+
+#endif
diff --git a/taxonomynode.cpp b/taxonomynode.cpp
new file mode 100755
index 0000000..b90bda1
--- /dev/null
+++ b/taxonomynode.cpp
@@ -0,0 +1,72 @@
+/*
+ *  taxonomynode.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 7/8/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+/**************************************************************************************************/
+
+#include "taxonomynode.h"
+
+/**************************************************************************************************/
+
+TaxonomyNode::TaxonomyNode(string n, int l): name(n), level(l){
+    m = MothurOut::getInstance();
+	parent = -1;
+	numChildren = 0;
+	numSeqs = 0;
+}
+
+/**************************************************************************************************/
+
+void TaxonomyNode::setName(string n)			{	name = n;					}
+
+/**************************************************************************************************/
+
+string TaxonomyNode::getName()					{	return name;				}
+
+/**************************************************************************************************/
+
+void TaxonomyNode::setParent(int p)				{	parent = p;					}
+
+/**************************************************************************************************/
+
+int TaxonomyNode::getParent()					{	return parent;				}
+
+/**************************************************************************************************/
+
+void TaxonomyNode::makeChild(string c, int i)	{	children[c] = i;			}
+
+
+/**************************************************************************************************/
+
+map<string, int> TaxonomyNode::getChildren()	{	return children;			}
+
+/**************************************************************************************************/
+
+int TaxonomyNode::getChildIndex(string c){
+	map<string, int>::iterator it = children.find(c);
+	if(it != children.end())	{	return it->second;			}
+	else						{	return -1;					}	
+}
+
+/**************************************************************************************************/
+
+int	TaxonomyNode::getNumKids()					{	return (int)children.size();		}
+
+/**************************************************************************************************/
+
+int	TaxonomyNode::getNumSeqs()					{	return numSeqs;				}
+
+/**************************************************************************************************/
+
+void TaxonomyNode::setTotalSeqs(int n)			{	totalSeqs = n;				}
+
+/**************************************************************************************************/
+
+int TaxonomyNode::getLevel()					{	return level;				}
+
+/**************************************************************************************************/
diff --git a/taxonomynode.h b/taxonomynode.h
new file mode 100755
index 0000000..08bad3e
--- /dev/null
+++ b/taxonomynode.h
@@ -0,0 +1,53 @@
+#ifndef TAXONOMYNODE
+#define TAXONOMYNODE
+
+/*
+ *  taxonomynode.h
+ *  
+ *
+ *  Created by Pat Schloss on 7/8/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+/**************************************************************************************************/
+
+#include "mothurout.h"
+/**************************************************************************************************/
+
+class TaxonomyNode {
+	
+public:
+	TaxonomyNode();
+	TaxonomyNode(string, int);
+	void setName(string);
+	string getName();
+
+
+	void setParent(int);
+	int getParent();
+	
+	void makeChild(string, int);
+	map<string, int> getChildren();
+	int getChildIndex(string);
+	int	getNumKids();
+	int getNumSeqs();
+	void setTotalSeqs(int);
+	int getLevel();
+	
+private:
+	int parent;
+	map<string, int> children;
+	int numChildren;
+	int level;
+	
+protected:
+    MothurOut* m;
+	int numSeqs;
+	int totalSeqs;
+	string name;
+};
+
+/**************************************************************************************************/
+
+#endif
-- 
2.39.2