From: westcott <westcott>
Date: Thu, 5 Nov 2009 14:40:15 +0000 (+0000)
Subject: started work on classify.seqs command. changed the database class so that it does... 
X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=63e089e0b3aad1741bab60119ed7ccc784dce347

started work on classify.seqs command. changed the database class so that it does not store the template sequences. created alignmentDB class that stores template seqs and uses database class for searching.  made changes to aligncommand and chimeracheckrdp to reflect the changes to database class.
---

diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj
index 252c02e..9b064e0 100644
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -72,7 +72,6 @@
 		379293C30F2DE73400B9034A /* treemap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379293C20F2DE73400B9034A /* treemap.cpp */; };
 		379294700F2E191800B9034A /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3792946F0F2E191800B9034A /* parsimonycommand.cpp */; };
 		3792948A0F2E258500B9034A /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379294890F2E258500B9034A /* parsimony.cpp */; };
-		3799A9500FD6A58C00E33EDE /* distancedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3799A94C0FD6A58C00E33EDE /* distancedb.cpp */; };
 		3799A9510FD6A58C00E33EDE /* seqsummarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3799A94E0FD6A58C00E33EDE /* seqsummarycommand.cpp */; };
 		379D3D510FF90E090068C1C0 /* chimeraseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379D3D500FF90E090068C1C0 /* chimeraseqscommand.cpp */; };
 		37AD4CE40F28AEA300AA2D49 /* sharedlistvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4CE30F28AEA300AA2D49 /* sharedlistvector.cpp */; };
@@ -169,6 +168,13 @@
 		A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */; };
 		A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A751ACE91098B283003D0911 /* readcluster.cpp */; };
 		A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; };
+		A7861478109F5FA00010AAAF /* classifyseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */; };
+		A787810310A06D5D0086103D /* classify.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787810210A06D5D0086103D /* classify.cpp */; };
+		A787811510A074330086103D /* phylotype.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787811410A074330086103D /* phylotype.cpp */; };
+		A78781BA10A0813E0086103D /* doTaxonomy.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78781B810A0813D0086103D /* doTaxonomy.cpp */; };
+		A787821110A0AAE70086103D /* bayesian.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787821010A0AAE70086103D /* bayesian.cpp */; };
+		A78782AB10A1B1CB0086103D /* alignmentdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78782AA10A1B1CB0086103D /* alignmentdb.cpp */; };
+		A787844510A1EBDD0086103D /* knn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787844410A1EBDD0086103D /* knn.cpp */; };
 		A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */; };
 		A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0450D106CEEC90046FC83 /* slayer.cpp */; };
 		A7E4A783106913F900688F62 /* getsharedotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E4A782106913F900688F62 /* getsharedotucommand.cpp */; };
@@ -328,8 +334,6 @@
 		379294880F2E258500B9034A /* parsimony.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsimony.h; sourceTree = SOURCE_ROOT; };
 		379294890F2E258500B9034A /* parsimony.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parsimony.cpp; sourceTree = SOURCE_ROOT; };
 		3792948D0F2E271100B9034A /* treecalculator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treecalculator.h; sourceTree = SOURCE_ROOT; };
-		3799A94C0FD6A58C00E33EDE /* distancedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = distancedb.cpp; sourceTree = SOURCE_ROOT; };
-		3799A94D0FD6A58C00E33EDE /* distancedb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = distancedb.hpp; sourceTree = SOURCE_ROOT; };
 		3799A94E0FD6A58C00E33EDE /* seqsummarycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = seqsummarycommand.cpp; sourceTree = SOURCE_ROOT; };
 		3799A94F0FD6A58C00E33EDE /* seqsummarycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqsummarycommand.h; sourceTree = SOURCE_ROOT; };
 		379D3D4F0FF90E090068C1C0 /* chimeraseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraseqscommand.h; sourceTree = SOURCE_ROOT; };
@@ -538,6 +542,20 @@
 		A751ACE91098B283003D0911 /* readcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readcluster.cpp; sourceTree = SOURCE_ROOT; };
 		A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
 		A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
+		A7861476109F5FA00010AAAF /* classifyseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifyseqscommand.h; sourceTree = "<group>"; };
+		A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifyseqscommand.cpp; sourceTree = "<group>"; };
+		A78780FE10A06B8B0086103D /* classify.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classify.h; sourceTree = SOURCE_ROOT; };
+		A787810210A06D5D0086103D /* classify.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classify.cpp; sourceTree = SOURCE_ROOT; };
+		A787811310A074330086103D /* phylotype.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylotype.h; sourceTree = SOURCE_ROOT; };
+		A787811410A074330086103D /* phylotype.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylotype.cpp; sourceTree = SOURCE_ROOT; };
+		A78781B810A0813D0086103D /* doTaxonomy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = doTaxonomy.cpp; sourceTree = "<group>"; };
+		A78781B910A0813D0086103D /* doTaxonomy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = doTaxonomy.h; sourceTree = "<group>"; };
+		A787820F10A0AAE70086103D /* bayesian.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bayesian.h; sourceTree = "<group>"; };
+		A787821010A0AAE70086103D /* bayesian.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bayesian.cpp; sourceTree = "<group>"; };
+		A78782A910A1B1CB0086103D /* alignmentdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignmentdb.h; sourceTree = "<group>"; };
+		A78782AA10A1B1CB0086103D /* alignmentdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignmentdb.cpp; sourceTree = "<group>"; };
+		A787844310A1EBDD0086103D /* knn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = knn.h; sourceTree = "<group>"; };
+		A787844410A1EBDD0086103D /* knn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = knn.cpp; sourceTree = "<group>"; };
 		A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; };
 		A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraslayer.cpp; sourceTree = SOURCE_ROOT; };
 		A7B0450C106CEEC90046FC83 /* slayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slayer.h; sourceTree = SOURCE_ROOT; };
@@ -592,10 +610,9 @@
 				373C68DB0FC1C38D00137ACD /* blastalign.cpp */,
 				37D928A60F2133C0001D4494 /* calculators */,
 				374F2FE81006346600E97C66 /* chimera */,
+				A787811210A073F40086103D /* classifiers */,
 				37D927C20F21331F001D4494 /* cluster.hpp */,
 				37D927C10F21331F001D4494 /* cluster.cpp */,
-				A729ACE210849BBE00139801 /* hcluster.h */,
-				A729ACE310849BBE00139801 /* hcluster.cpp */,
 				37D928A90F2133E5001D4494 /* commands */,
 				37D927C60F21331F001D4494 /* collect.h */,
 				37D927C50F21331F001D4494 /* collect.cpp */,
@@ -619,6 +636,8 @@
 				37D927DF0F21331F001D4494 /* globaldata.cpp */,
 				373C68BB0FC1C1A600137ACD /* gotohoverlap.hpp */,
 				373C68BA0FC1C1A600137ACD /* gotohoverlap.cpp */,
+				A729ACE210849BBE00139801 /* hcluster.h */,
+				A729ACE310849BBE00139801 /* hcluster.cpp */,
 				375873ED0F7D646F0040F377 /* heatmap.h */,
 				375873EE0F7D646F0040F377 /* heatmap.cpp */,
 				378598720FDD4C1500EF9D03 /* heatmapsim.h */,
@@ -847,6 +866,8 @@
 				377326630FAF16E0007ABB8B /* consensuscommand.cpp */,
 				379D3D4F0FF90E090068C1C0 /* chimeraseqscommand.h */,
 				379D3D500FF90E090068C1C0 /* chimeraseqscommand.cpp */,
+				A7861476109F5FA00010AAAF /* classifyseqscommand.h */,
+				A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */,
 				37B28F660F27590100808A62 /* deconvolutecommand.h */,
 				37B28F670F27590100808A62 /* deconvolutecommand.cpp */,
 				37C753CC0FB3415200DBD02E /* distancecommand.h */,
@@ -942,13 +963,13 @@
 				373C68A00FC1C07D00137ACD /* alignment.cpp */,
 				373C68A30FC1C07D00137ACD /* alignmentcell.hpp */,
 				373C68A20FC1C07D00137ACD /* alignmentcell.cpp */,
+				A78782A910A1B1CB0086103D /* alignmentdb.h */,
+				A78782AA10A1B1CB0086103D /* alignmentdb.cpp */,
 				373C69170FC1C8AF00137ACD /* blastdb.hpp */,
 				373C69160FC1C8AF00137ACD /* blastdb.cpp */,
 				37D927D40F21331F001D4494 /* database.hpp */,
 				37D927D30F21331F001D4494 /* database.cpp */,
 				37D927D50F21331F001D4494 /* datavector.hpp */,
-				3799A94D0FD6A58C00E33EDE /* distancedb.hpp */,
-				3799A94C0FD6A58C00E33EDE /* distancedb.cpp */,
 				37D927DC0F21331F001D4494 /* fastamap.h */,
 				37D927DB0F21331F001D4494 /* fastamap.cpp */,
 				375873EA0F7D64520040F377 /* fullmatrix.h */,
@@ -1010,6 +1031,23 @@
 			name = errorcheckor;
 			sourceTree = "<group>";
 		};
+		A787811210A073F40086103D /* classifiers */ = {
+			isa = PBXGroup;
+			children = (
+				A78780FE10A06B8B0086103D /* classify.h */,
+				A787810210A06D5D0086103D /* classify.cpp */,
+				A787820F10A0AAE70086103D /* bayesian.h */,
+				A787821010A0AAE70086103D /* bayesian.cpp */,
+				A78781B910A0813D0086103D /* doTaxonomy.h */,
+				A78781B810A0813D0086103D /* doTaxonomy.cpp */,
+				A787844310A1EBDD0086103D /* knn.h */,
+				A787844410A1EBDD0086103D /* knn.cpp */,
+				A787811310A074330086103D /* phylotype.h */,
+				A787811410A074330086103D /* phylotype.cpp */,
+			);
+			name = classifiers;
+			sourceTree = "<group>";
+		};
 		C6859E8C029090F304C91782 /* Documentation */ = {
 			isa = PBXGroup;
 			children = (
@@ -1204,7 +1242,6 @@
 				21E859D80FC4632E005E1A48 /* matrixoutputcommand.cpp in Sources */,
 				3749271D0FD58C840031C06B /* getsabundcommand.cpp in Sources */,
 				3749273F0FD5956B0031C06B /* getrabundcommand.cpp in Sources */,
-				3799A9500FD6A58C00E33EDE /* distancedb.cpp in Sources */,
 				3799A9510FD6A58C00E33EDE /* seqsummarycommand.cpp in Sources */,
 				371B30B40FD7EE67000414CA /* screenseqscommand.cpp in Sources */,
 				7E09C5140FDA79C5002ECAE5 /* reversecommand.cpp in Sources */,
@@ -1233,6 +1270,13 @@
 				A729ACD010848E6100139801 /* hclustercommand.cpp in Sources */,
 				A729ACE410849BBE00139801 /* hcluster.cpp in Sources */,
 				A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */,
+				A7861478109F5FA00010AAAF /* classifyseqscommand.cpp in Sources */,
+				A787810310A06D5D0086103D /* classify.cpp in Sources */,
+				A787811510A074330086103D /* phylotype.cpp in Sources */,
+				A78781BA10A0813E0086103D /* doTaxonomy.cpp in Sources */,
+				A787821110A0AAE70086103D /* bayesian.cpp in Sources */,
+				A78782AB10A1B1CB0086103D /* alignmentdb.cpp in Sources */,
+				A787844510A1EBDD0086103D /* knn.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/aligncommand.cpp b/aligncommand.cpp
index 48c67ec..2ca450a 100644
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -22,10 +22,6 @@
 #include "blastalign.hpp"
 #include "noalign.hpp"
 
-#include "kmerdb.hpp"
-#include "suffixdb.hpp"
-#include "blastdb.hpp"
-
 #include "nast.hpp"
 #include "nastreport.hpp"
 
@@ -126,7 +122,7 @@ void AlignCommand::help(){
 		mothurOut("The template and candidate parameters are required.\n");
 		mothurOut("The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer and blast. The default is kmer.\n");
 		mothurOut("The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n");
-		mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 7.\n");
+		mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n");
 		mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n");
 		mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n");
 		mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -1.0.\n");
@@ -149,19 +145,9 @@ int AlignCommand::execute(){
 	try {
 		if (abort == true) {	return 0;	}
 		
-		if(search == "kmer")			{	templateDB = new KmerDB(templateFileName, kmerSize);	}
-		else if(search == "suffix")		{	templateDB = new SuffixDB(templateFileName);			}
-		else if(search == "blast")		{	templateDB = new BlastDB(templateFileName, gapOpen, gapExtend, match, misMatch);	}
-		else {
-			mothurOut(search + " is not a valid search option. I will run the command using kmer, ksize=8.");
-			mothurOutEndLine();
-			kmerSize = 8;
-			
-			templateDB = new KmerDB(templateFileName, kmerSize);
-		}
-		
+		templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch);
 		int longestBase = templateDB->getLongestBase();
-		
+	
 		if(align == "gotoh")			{	alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);			}
 		else if(align == "needleman")	{	alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);				}
 		else if(align == "blast")		{	alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);		}
@@ -281,7 +267,7 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
 	
 			Sequence temp = templateDB->findClosestSequence(candidateSeq);
 			Sequence* templateSeq = &temp;
-	
+
 			report.setTemplate(templateSeq);
 			report.setSearchParameters(search, templateDB->getSearchScore());
 				
@@ -290,13 +276,13 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
 			report.setAlignmentParameters(align, alignment);
 	
 			report.setNastParameters(nast);
-
+	
 			alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
 				
 			report.print();
 			
 			delete candidateSeq;
-	
+		
 		}
 		
 		alignmentFile.close();
diff --git a/aligncommand.h b/aligncommand.h
index 50e9bff..c3c2441 100644
--- a/aligncommand.h
+++ b/aligncommand.h
@@ -14,6 +14,7 @@
 #include "command.hpp"
 #include "database.hpp"
 #include "alignment.hpp"
+#include "alignmentdb.h"
 
 class AlignCommand : public Command {
 	
@@ -32,7 +33,7 @@ private:
 	vector<int> processIDS;   //processid
 	vector<linePair*> lines;
 	
-	Database* templateDB;
+	AlignmentDB* templateDB;
 	Alignment* alignment;
 	
 	int driver(linePair*, string, string);
diff --git a/alignmentdb.cpp b/alignmentdb.cpp
new file mode 100644
index 0000000..493521e
--- /dev/null
+++ b/alignmentdb.cpp
@@ -0,0 +1,111 @@
+/*
+ *  alignmentdb.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/4/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "alignmentdb.h"
+#include "kmerdb.hpp"
+#include "suffixdb.hpp"
+#include "blastdb.hpp"
+
+
+/**************************************************************************************************/
+AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch){		//	This assumes that the template database is in fasta format, may 
+	try {											//	need to alter this in the future?
+		longest = 0;
+		
+		ifstream fastaFile;
+		openInputFile(fastaFileName, fastaFile);
+		
+		mothurOutEndLine();
+		mothurOut("Reading in the " + fastaFileName + " template sequences...\t");	cout.flush();
+		
+		while (!fastaFile.eof()) {
+			Sequence temp(fastaFile);
+			
+			templateSequences.push_back(temp);
+			
+			//save longest base
+			if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length(); }
+			
+			gobble(fastaFile);
+		}
+		
+		numSeqs = templateSequences.size();
+		
+		fastaFile.close();
+		//all of this is elsewhere already!
+		
+		mothurOut("DONE.");
+		mothurOutEndLine();	cout.flush();
+		
+		//in case you delete the seqs and then ask for them
+		emptySequence = Sequence();
+		emptySequence.setName("no_match");
+		emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+		emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+		
+		bool needToGenerate = true;
+		string kmerDBName;
+		if(method == "kmer")			{	
+			search = new KmerDB(fastaFileName, kmerSize);			
+			
+			kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+			ifstream kmerFileTest(kmerDBName.c_str());
+			
+			if(kmerFileTest){	needToGenerate = false;		}
+		}
+		else if(method == "suffix")		{	search = new SuffixDB(numSeqs);								}
+		else if(method == "blast")		{	search = new BlastDB(gapOpen, gapExtend, match, misMatch);	}
+		else {
+			mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+			mothurOutEndLine();
+			search = new KmerDB(fastaFileName, 8);
+		}
+		
+		if (needToGenerate) {
+		
+			//add sequences to search 
+			for (int i = 0; i < templateSequences.size(); i++) {
+				search->addSequence(templateSequences[i]);
+			}
+			search->generateDB();
+			
+		}else if ((method == "kmer") && (!needToGenerate)) {
+			ifstream kmerFileTest(kmerDBName.c_str());
+			search->readKmerDB(kmerFileTest);	
+		}
+		
+		search->setNumSeqs(numSeqs);
+	}
+	catch(exception& e) {
+		errorOut(e, "AlignmentDB", "AlignmentDB");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+AlignmentDB::~AlignmentDB() {  delete search;	}
+/**************************************************************************************************/
+Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
+	try{
+	
+		vector<int> spot = search->findClosestSequences(seq, 1);
+
+		if (spot.size() != 0)	{		return templateSequences[spot[0]];	}
+		else					{		return emptySequence;				}
+		
+	}
+	catch(exception& e) {
+		errorOut(e, "AlignmentDB", "findClosestSequence");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+
+
+
+
diff --git a/alignmentdb.h b/alignmentdb.h
new file mode 100644
index 0000000..339f46f
--- /dev/null
+++ b/alignmentdb.h
@@ -0,0 +1,42 @@
+#ifndef ALIGNMENTDB_H
+#define ALIGNMENTDB_H
+
+/*
+ *  alignmentdb.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/4/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "sequence.hpp"
+#include "database.hpp"
+
+/**************************************************************************************************/
+
+class AlignmentDB {
+
+public:
+
+	AlignmentDB(string, string, int, int, int, int, int);  //reads fastafile passed in and stores sequences
+	~AlignmentDB();
+	
+	Sequence findClosestSequence(Sequence*);
+	float getSearchScore()  {  return search->getSearchScore();  }
+	int getLongestBase()	{  return longest;  }
+	
+private:
+	int numSeqs, longest;
+	float searchScore;
+	
+	Database* search;
+	vector<Sequence> templateSequences;
+	Sequence emptySequence;
+};
+
+/**************************************************************************************************/
+
+#endif
+
diff --git a/bayesian.cpp b/bayesian.cpp
new file mode 100644
index 0000000..caae212
--- /dev/null
+++ b/bayesian.cpp
@@ -0,0 +1,29 @@
+/*
+ *  bayesian.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/3/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "bayesian.h"
+
+/**************************************************************************************************/
+Bayesian::Bayesian(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch) : 
+Classify(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch) {}
+/**************************************************************************************************/
+string Bayesian::getTaxonomy(Sequence* seq) {
+	try {
+		string tax;
+		
+				
+		return tax;	
+	}
+	catch(exception& e) {
+		errorOut(e, "Bayesian", "getTaxonomy");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+
diff --git a/bayesian.h b/bayesian.h
new file mode 100644
index 0000000..cbe949f
--- /dev/null
+++ b/bayesian.h
@@ -0,0 +1,34 @@
+#ifndef BAYESIAN_H
+#define BAYESIAN_H
+
+/*
+ *  bayesian.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/3/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "classify.h"
+
+/**************************************************************************************************/
+
+class Bayesian : public Classify {
+	
+public:
+	Bayesian(string, string, string, int, int, int, int, int);
+	~Bayesian() {};
+	
+	string getTaxonomy(Sequence*);
+	
+	
+private:
+	
+};
+
+/**************************************************************************************************/
+
+#endif
+
diff --git a/blastdb.cpp b/blastdb.cpp
index 0cba833..6b45a48 100644
--- a/blastdb.cpp
+++ b/blastdb.cpp
@@ -14,90 +14,110 @@
 
 /**************************************************************************************************/
 
-BlastDB::BlastDB(string fastaFileName, float gO, float gE, float m, float mM) : Database(fastaFileName), 
+BlastDB::BlastDB(float gO, float gE, float m, float mM) : Database(), 
 gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
 	
 	globaldata = GlobalData::getInstance();
-	
-	mothurOut("Generating the temporary BLAST database...\t");	cout.flush();
+	count = 0;
 
 	int randNumber = rand();
 	dbFileName = toString(randNumber) + ".template.unaligned.fasta";
 	queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
 	blastFileName = toString(randNumber) + ".blast";
 
-
-	ofstream unalignedFastaFile;
-	openOutputFile(dbFileName, unalignedFastaFile);				
-	
-	for(int i=0;i<numSeqs;i++){									//	generating a fasta file with unaligned template
-		unalignedFastaFile << '>' << i << endl;					//	sequences, which will be input to formatdb
-		unalignedFastaFile << templateSequences[i].getUnaligned() << endl;
-	}
-	unalignedFastaFile.close();
-	
-	path = globaldata->argv;
-	path = path.substr(0, (path.find_last_of('m')));
-	
-	string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;	//	format the database, -o option gives us the ability
-	system(formatdbCommand.c_str());								//	to get the right sequence names, i think. -p F
-																	//	option tells formatdb that seqs are DNA, not prot
-	mothurOut("DONE."); mothurOutEndLine();	mothurOutEndLine(); cout.flush();
-	emptySequence = Sequence();
-	emptySequence.setName("no_match");
-	emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
-	emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
-
-
 }
 
 /**************************************************************************************************/
 
 BlastDB::~BlastDB(){
-	//for (int i = 0; i < templateSequences.size(); i++) {  delete templateSequences[i];   }
-	//templateSequences.clear();
-	//delete emptySequence;
-	
 	remove(queryFileName.c_str());				//	let's clean stuff up and remove the temp files
 	remove(dbFileName.c_str());					//	let's clean stuff up and remove the temp files
 	remove(blastFileName.c_str());				//	let's clean stuff up and remove the temp files
 }
-
 /**************************************************************************************************/
+//assumes you have added all the template sequences using the addSequence function and run generateDB.
+vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
+	try{
+		vector<int> topMatches;
+		
+		ofstream queryFile;
+		openOutputFile(queryFileName, queryFile);
+		queryFile << '>' << seq->getName() << endl;
+		queryFile << seq->getUnaligned() << endl;
+		queryFile.close();
+				
+		//	the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
+		//	wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
+		//	long.  With this setting, it seems comparable in speed to the suffix tree approach.
+		
+		string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -b 1 -m 8 -W 28 -v " + toString(n);
+		blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
+		system(blastCommand.c_str());
+		
+		ifstream m8FileHandle;
+		openInputFile(blastFileName, m8FileHandle);
+		
+		string dummy;
+		int templateAccession;
+		gobble(m8FileHandle);
+		
+		while(!m8FileHandle.eof()){
+			m8FileHandle >> dummy >> templateAccession >> searchScore;
+			
+			//get rest of junk in line
+			while (!m8FileHandle.eof())	{	char c = m8FileHandle.get(); if (c == 10 || c == 13){	break;	}	} 
+			
+			gobble(m8FileHandle);
+			topMatches.push_back(templateAccession);
+		}
+		m8FileHandle.close();
+		
+		return topMatches;
+	}
+	catch(exception& e) {
+		errorOut(e, "BlastDB", "findClosestSequences");
+		exit(1);
+	}
 
-Sequence BlastDB::findClosestSequence(Sequence* candidate){
-
-	ofstream queryFile;
-	openOutputFile(queryFileName, queryFile);
-	queryFile << '>' << candidate->getName() << endl;
-	queryFile << candidate->getUnaligned() << endl;
-	queryFile.close();
+}
+/**************************************************************************************************/
+void BlastDB::addSequence(Sequence seq) {
+	try {
 	
+		ofstream unalignedFastaFile;
+		openOutputFileAppend(dbFileName, unalignedFastaFile);				
 	
-//	the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
-//	wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
-//	long.  With this setting, it seems comparable in speed to the suffix tree approach.
-
-	string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -b 1 -m 8 -W 28";
-	blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
-	system(blastCommand.c_str());
+		//	generating a fasta file with unaligned template
+		unalignedFastaFile << '>' << count << endl;					//	sequences, which will be input to formatdb
+		unalignedFastaFile << seq.getUnaligned() << endl;
+		unalignedFastaFile.close();
+		
+		count++;
+	}
+	catch(exception& e) {
+		errorOut(e, "BlastDB", "addSequence");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+void BlastDB::generateDB() {
+	try {
 	
-	ifstream m8FileHandle;
-	openInputFile(blastFileName, m8FileHandle);
+		mothurOut("Generating the temporary BLAST database...\t");	cout.flush();
+		
+		path = globaldata->argv;
+		path = path.substr(0, (path.find_last_of('m')));
 	
-	string dummy;
-	int templateAccession;
-	gobble(m8FileHandle);
-	if(!m8FileHandle.eof()){
-		m8FileHandle >> dummy >> templateAccession >> searchScore;
-		m8FileHandle.close();
-		return templateSequences[templateAccession];
+		string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;	//	format the database, -o option gives us the ability
+		system(formatdbCommand.c_str());								//	to get the right sequence names, i think. -p F
+																	//	option tells formatdb that seqs are DNA, not prot
+		mothurOut("DONE."); mothurOutEndLine();	mothurOutEndLine(); cout.flush();
 	}
-	else{
-		searchScore = 0.00;
-		return emptySequence;
+	catch(exception& e) {
+		errorOut(e, "BlastDB", "generateDB");
+		exit(1);
 	}
-	m8FileHandle.close();
 }
 
 /**************************************************************************************************/
+
diff --git a/blastdb.hpp b/blastdb.hpp
index f0f17f9..c718bb4 100644
--- a/blastdb.hpp
+++ b/blastdb.hpp
@@ -15,10 +15,14 @@
 #include "globaldata.hpp"
 
 class BlastDB : public Database {
+
 public:
-	BlastDB(string, float, float, float, float);
+	BlastDB(float, float, float, float);
 	~BlastDB();
-	Sequence findClosestSequence(Sequence*);
+	
+	void generateDB();
+	void addSequence(Sequence);
+	vector<int> findClosestSequences(Sequence*, int);
 
 private:
 	string dbFileName;
@@ -26,11 +30,11 @@ private:
 	string blastFileName;
 	string path;
 	
+	int count;
 	float gapOpen;
 	float gapExtend;
 	float match;
 	float misMatch;
-	Sequence emptySequence;
 	GlobalData* globaldata;
 };
 
diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp
index acc3aa2..07fb8ab 100644
--- a/chimeracheckrdp.cpp
+++ b/chimeracheckrdp.cpp
@@ -77,11 +77,13 @@ void ChimeraCheckRDP::getChimeras() {
 	try {
 		
 		//read in query sequences and subject sequences
-		mothurOut("Reading sequences and template file... "); cout.flush();
+		mothurOutEndLine();
+		mothurOut("Reading query sequences... "); cout.flush();
 		querySeqs = readSeqs(fastafile);
+		mothurOut("Done."); 
 		//templateSeqs = readSeqs(templateFile);
-		templateDB = new KmerDB(templateFile, kmerSize);
-		mothurOut("Done."); mothurOutEndLine();
+		templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0,0,0,0);
+		mothurOutEndLine();
 		
 		int numSeqs = querySeqs.size();
 		
diff --git a/chimeracheckrdp.h b/chimeracheckrdp.h
index f436718..fe46206 100644
--- a/chimeracheckrdp.h
+++ b/chimeracheckrdp.h
@@ -14,7 +14,7 @@
 #include "chimera.h"
 #include "kmer.hpp"
 #include "kmerdb.hpp"
-#include "database.hpp"
+#include "alignmentdb.h"
 
 /***********************************************************/
 //This class was created using the algorythms described in 
@@ -39,7 +39,7 @@ class ChimeraCheckRDP : public Chimera {
 		
 		vector<linePair*> lines;
 		vector<Sequence*> querySeqs;
-		Database* templateDB;
+		AlignmentDB* templateDB;
 		Kmer* kmer;
 		
 		vector< vector<sim> > IS;  //IS[0] is the vector of IS values for each window for querySeqs[0]
diff --git a/classify.cpp b/classify.cpp
new file mode 100644
index 0000000..605c1f8
--- /dev/null
+++ b/classify.cpp
@@ -0,0 +1,124 @@
+/*
+ *  classify.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/3/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "classify.h"
+#include "sequence.hpp"
+#include "kmerdb.hpp"
+#include "suffixdb.hpp"
+#include "blastdb.hpp"
+
+/**************************************************************************************************/
+
+Classify::Classify(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch) : taxFile(tfile), templateFile(tempFile) {		
+	try {											
+		
+		readTaxonomy(taxFile);
+		
+		int numSeqs = 0;
+		//need to know number of template seqs for suffixdb
+		if (method == "suffix") {
+			ifstream inFASTA;
+			openInputFile(tempFile, inFASTA);
+			numSeqs = count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+			inFASTA.close();
+		}
+
+		mothurOut("Generating search database...    "); cout.flush();
+				
+		bool needToGenerate = true;
+		string kmerDBName;
+		if(method == "kmer")			{	
+			database = new KmerDB(tempFile, kmerSize);			
+			
+			kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+			ifstream kmerFileTest(kmerDBName.c_str());
+			if(kmerFileTest){	needToGenerate = false;		}
+		}
+		else if(method == "suffix")		{	database = new SuffixDB(numSeqs);								}
+		else if(method == "blast")		{	database = new BlastDB(gapOpen, gapExtend, match, misMatch);	}
+		else {
+			mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+			mothurOutEndLine();
+			database = new KmerDB(tempFile, 8);
+		}
+		
+		if (needToGenerate) {
+			ifstream fastaFile;
+			openInputFile(tempFile, fastaFile);
+		
+			while (!fastaFile.eof()) {
+				Sequence temp(fastaFile);
+				gobble(fastaFile);
+			
+				names.push_back(temp.getName());
+				database->addSequence(temp);	
+			}
+			fastaFile.close();
+
+			database->generateDB();
+			
+		}else if ((method == "kmer") && (!needToGenerate)) {	
+			ifstream kmerFileTest(kmerDBName.c_str());
+			database->readKmerDB(kmerFileTest);	
+			
+			ifstream fastaFile;
+			openInputFile(tempFile, fastaFile);
+		
+			while (!fastaFile.eof()) {
+				Sequence temp(fastaFile);
+				gobble(fastaFile);
+			
+				names.push_back(temp.getName());
+			}
+			fastaFile.close();
+		}
+		
+		database->setNumSeqs(names.size());
+		
+		mothurOut("DONE."); mothurOutEndLine();
+
+	}
+	catch(exception& e) {
+		errorOut(e, "Classify", "Classify");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+
+void Classify::readTaxonomy(string file) {
+	try {
+	
+		ifstream inTax;
+		openInputFile(file, inTax);
+	
+		mothurOutEndLine();
+		mothurOut("Reading in the " + file + " taxonomy...\t");	cout.flush();
+		
+		string name, taxInfo;
+		//read template seqs and save
+		while (!inTax.eof()) {
+			inTax >> name >> taxInfo;
+			
+			taxonomy[name] = taxInfo;
+		
+			gobble(inTax);
+		}
+		inTax.close();
+	
+		mothurOut("DONE.");
+		mothurOutEndLine();	cout.flush();
+	
+	}
+	catch(exception& e) {
+		errorOut(e, "Classify", "readTaxonomy");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+
diff --git a/classify.h b/classify.h
new file mode 100644
index 0000000..2926800
--- /dev/null
+++ b/classify.h
@@ -0,0 +1,50 @@
+#ifndef CLASSIFY_H
+#define CLASSIFY_H
+
+/*
+ *  classify.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/3/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+/* This class is a parent to phylotyp, bayesian, knn.  */
+
+#include "mothur.h"
+#include "database.hpp"
+
+
+
+class Sequence;
+
+/**************************************************************************************************/
+
+class Classify {
+
+public:
+	Classify(string, string, string, int, int, int, int, int);
+	Classify(){};
+	
+	virtual ~Classify(){};
+	virtual string getTaxonomy(Sequence*) = 0;
+	
+protected:
+
+	map<string, string> taxonomy;  //name maps to taxonomy
+	map<string, string>::iterator it;
+	Database* database;
+	
+	string taxFile, templateFile;
+	vector<string> names;
+	
+	void readTaxonomy(string);
+		
+};
+
+/**************************************************************************************************/
+
+#endif
+
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp
new file mode 100644
index 0000000..69ef712
--- /dev/null
+++ b/classifyseqscommand.cpp
@@ -0,0 +1,360 @@
+/*
+ *  classifyseqscommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/2/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "classifyseqscommand.h"
+#include "sequence.hpp"
+#include "phylotype.h"
+#include "bayesian.h"
+#include "doTaxonomy.h"
+#include "knn.h"
+
+//**********************************************************************************************************************
+
+ClassifySeqsCommand::ClassifySeqsCommand(string option){
+	try {
+		//		globaldata = GlobalData::getInstance();
+		abort = false;
+		
+		//allow user to run help
+		if(option == "help") { help(); abort = true; }
+		
+		else {
+			
+			//valid paramters for this command
+			string AlignArray[] =  {"template","fasta","search","ksize","method","processors","taxonomy","match","mismatch","gapopen","gapextend","numwanted"};
+			vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+			
+			OptionParser parser(option);
+			map<string, string> parameters = parser.getParameters(); 
+			
+			ValidParameters validParameter;
+			
+			//check to make sure all parameters are valid for command
+			for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+				if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+			}
+			
+			//check for required parameters
+			templateFileName = validParameter.validFile(parameters, "template", true);
+			if (templateFileName == "not found") { 
+				mothurOut("template is a required parameter for the classify.seqs command."); 
+				mothurOutEndLine();
+				abort = true; 
+			}
+			else if (templateFileName == "not open") { abort = true; }	
+			
+			fastaFileName = validParameter.validFile(parameters, "fasta", true);
+			if (fastaFileName == "not found") { 
+				mothurOut("fasta is a required parameter for the classify.seqs command."); 
+				mothurOutEndLine();
+				abort = true; 
+			}
+			else if (fastaFileName == "not open") { abort = true; }	
+			
+			taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
+			if (taxonomyFileName == "not found") { 
+				mothurOut("taxonomy is a required parameter for the classify.seqs command."); 
+				mothurOutEndLine();
+				abort = true; 
+			}
+			else if (taxonomyFileName == "not open") { abort = true; }	
+
+			
+			//check for optional parameter and set defaults
+			// ...at some point should added some additional type checking...
+			string temp;
+			temp = validParameter.validFile(parameters, "ksize", false);		if (temp == "not found"){	temp = "8";				}
+			convert(temp, kmerSize); 
+			
+			temp = validParameter.validFile(parameters, "processors", false);	if (temp == "not found"){	temp = "1";				}
+			convert(temp, processors); 
+			
+			search = validParameter.validFile(parameters, "search", false);		if (search == "not found"){	search = "kmer";		}
+			
+			method = validParameter.validFile(parameters, "method", false);		if (method == "not found"){	method = "phylotype";	}
+			
+			temp = validParameter.validFile(parameters, "match", false);		if (temp == "not found"){	temp = "1.0";			}
+			convert(temp, match);  
+			
+			temp = validParameter.validFile(parameters, "mismatch", false);		if (temp == "not found"){	temp = "-1.0";			}
+			convert(temp, misMatch);  
+			
+			temp = validParameter.validFile(parameters, "gapopen", false);		if (temp == "not found"){	temp = "-2.0";			}
+			convert(temp, gapOpen);  
+			
+			temp = validParameter.validFile(parameters, "gapextend", false);	if (temp == "not found"){	temp = "-1.0";			}
+			convert(temp, gapExtend); 
+			
+			temp = validParameter.validFile(parameters, "numwanted", false);	if (temp == "not found"){	temp = "10";			}
+			convert(temp, numWanted);
+
+		}
+		
+	}
+	catch(exception& e) {
+		errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
+		exit(1);
+	}
+}
+
+//**********************************************************************************************************************
+
+ClassifySeqsCommand::~ClassifySeqsCommand(){	
+
+	if (abort == false) {
+		for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+	}
+}
+
+//**********************************************************************************************************************
+
+void ClassifySeqsCommand::help(){
+	try {
+		mothurOut("The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n");
+		mothurOut("The classify.seqs command parameters are template, fasta, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend and numwanted.\n");
+		mothurOut("The template, fasta and taxonomy parameters are required.\n");
+		mothurOut("The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer and blast. The default is kmer.\n");
+		mothurOut("The method parameter allows you to specify classification method to use.  Your options are: phylotype, bayesian and knn. The default is phylotype.\n");
+		mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n");
+		mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
+		mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n");
+		mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n");
+		mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -1.0.\n");
+		mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -2.0.\n");
+		mothurOut("The numwanted parameter allows you to specify the number of sequence matches you want with the knn method.  The default is 10.\n");
+		mothurOut("The classify.seqs command should be in the following format: \n");
+		mothurOut("classify.seqs(template=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n");
+		mothurOut("Example classify.seqs(fasta=amazon.fasta, template=core.filtered, method=phylotype, search=gotoh, ksize=8, processors=2)\n");
+		mothurOut("The .taxonomy file consists of 2 columns: 1 = your sequence name, 2 = the taxonomy for your sequence. \n");
+		mothurOut("The .tax.summary is a summary of the different taxonomies represented in your fasta file. \n");
+		mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
+	}
+	catch(exception& e) {
+		errorOut(e, "ClassifySeqsCommand", "help");
+		exit(1);
+	}
+}
+
+
+//**********************************************************************************************************************
+
+int ClassifySeqsCommand::execute(){
+	try {
+		if (abort == true) {	return 0;	}
+		
+		int start = time(NULL);
+		
+		if(method == "phylotype"){	classify = new PhyloType(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch);					}
+		//else if(method == "bayesian")		{	classify = new Bayesian(taxonomyFileName, templateFileName, search);		}
+		else if(method == "knn")			{	classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted);				}
+		else {
+			mothurOut(search + " is not a valid method option. I will run the command using phylotype.");
+			mothurOutEndLine();
+			classify = new PhyloType(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch);
+		}
+
+		int numFastaSeqs = 0;
+		
+		string newTaxonomyFile = getRootName(fastaFileName) + "taxonomy";
+		string taxSummary = getRootName(fastaFileName) + "tax.summary";
+		
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+		if(processors == 1){
+			ifstream inFASTA;
+			openInputFile(fastaFileName, inFASTA);
+			numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+			inFASTA.close();
+			
+			lines.push_back(new linePair(0, numFastaSeqs));
+			
+			driver(lines[0], newTaxonomyFile);
+			
+		}
+		else{
+			vector<int> positions;
+			processIDS.resize(0);
+			
+			ifstream inFASTA;
+			openInputFile(fastaFileName, inFASTA);
+			
+			string input;
+			while(!inFASTA.eof()){
+				input = getline(inFASTA);
+				if (input.length() != 0) {
+					if(input[0] == '>'){	int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);	}
+				}
+			}
+			inFASTA.close();
+			
+			numFastaSeqs = positions.size();
+			
+			int numSeqsPerProcessor = numFastaSeqs / processors;
+			
+			for (int i = 0; i < processors; i++) {
+				int startPos = positions[ i * numSeqsPerProcessor ];
+				if(i == processors - 1){
+					numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
+				}
+				lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+			}
+			createProcesses(newTaxonomyFile); 
+			
+			rename((newTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), newTaxonomyFile.c_str());
+			
+			for(int i=1;i<processors;i++){
+				appendTaxFiles((newTaxonomyFile + toString(processIDS[i]) + ".temp"), newTaxonomyFile);
+				remove((newTaxonomyFile + toString(processIDS[i]) + ".temp").c_str());
+			}
+			
+		}
+#else
+		ifstream inFASTA;
+		openInputFile(candidateFileName, inFASTA);
+		numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+		inFASTA.close();
+		
+		lines.push_back(new linePair(0, numFastaSeqs));
+		
+		driver(lines[0], newTaxonomyFile);
+#endif
+		
+		delete classify;
+		
+		//make taxonomy tree from new taxonomy file 
+		ifstream inTaxonomy;
+		openInputFile(newTaxonomyFile, inTaxonomy);
+		
+		string accession, taxaList;
+		PhyloTree taxaBrowser;
+		
+		//read in users taxonomy file and add sequences to tree
+		while(!inTaxonomy.eof()){
+			inTaxonomy >> accession >> taxaList;
+			
+			taxaBrowser.addSeqToTree(accession, taxaList);
+			
+			gobble(inTaxonomy);
+		}
+		inTaxonomy.close();
+		
+		taxaBrowser.assignHeirarchyIDs(0);
+		
+		ofstream outTaxTree;
+		openOutputFile(taxSummary, outTaxTree);
+		
+		taxaBrowser.print(outTaxTree);
+		
+		mothurOutEndLine();
+		mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences.");
+		mothurOutEndLine();
+		mothurOutEndLine();
+		
+		return 0;
+	}
+	catch(exception& e) {
+		errorOut(e, "ClassifySeqsCommand", "execute");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+
+void ClassifySeqsCommand::createProcesses(string taxFileName) {
+	try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+		int process = 0;
+		//		processIDS.resize(0);
+		
+		//loop through and create all the processes you want
+		while (process != processors) {
+			int pid = fork();
+			
+			if (pid > 0) {
+				processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+				process++;
+			}else if (pid == 0){
+				driver(lines[process], taxFileName + toString(getpid()) + ".temp");
+				exit(0);
+			}else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
+		}
+		
+		//force parent to wait until all the processes are done
+		for (int i=0;i<processors;i++) { 
+			int temp = processIDS[i];
+			wait(&temp);
+		}
+#endif		
+	}
+	catch(exception& e) {
+		errorOut(e, "ClassifySeqsCommand", "createProcesses");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+
+void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
+	try{
+		
+		ofstream output;
+		ifstream input;
+		openOutputFileAppend(filename, output);
+		openInputFile(temp, input);
+		
+		while(char c = input.get()){
+			if(input.eof())		{	break;			}
+			else				{	output << c;	}
+		}
+		
+		input.close();
+		output.close();
+	}
+	catch(exception& e) {
+		errorOut(e, "ClassifySeqsCommand", "appendTaxFiles");
+		exit(1);
+	}
+}
+
+//**********************************************************************************************************************
+
+int ClassifySeqsCommand::driver(linePair* line, string taxFName){
+	try {
+		ofstream outTax;
+		openOutputFile(taxFName, outTax);
+	
+		ifstream inFASTA;
+		openInputFile(fastaFileName, inFASTA);
+
+		inFASTA.seekg(line->start);
+		
+		string taxonomy;
+
+		for(int i=0;i<line->numSeqs;i++){
+			
+			Sequence* candidateSeq = new Sequence(inFASTA);
+
+			taxonomy = classify->getTaxonomy(candidateSeq);
+			
+			if (taxonomy != "bad seq") {
+				outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+			}
+							
+			delete candidateSeq;
+		}
+		
+		inFASTA.close();
+		outTax.close();
+		
+		return 1;
+	}
+	catch(exception& e) {
+		errorOut(e, "ClassifySeqsCommand", "driver");
+		exit(1);
+	}
+}
+
+/**************************************************************************************************/
diff --git a/classifyseqscommand.h b/classifyseqscommand.h
new file mode 100644
index 0000000..a77b7d1
--- /dev/null
+++ b/classifyseqscommand.h
@@ -0,0 +1,48 @@
+#ifndef CLASSIFYSEQSCOMMAND_H
+#define CLASSIFYSEQSCOMMAND_H
+
+/*
+ *  classifyseqscommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/2/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+#include "alignment.hpp"
+#include "classify.h"
+
+class ClassifySeqsCommand : public Command {
+	
+public:
+	ClassifySeqsCommand(string);	
+	~ClassifySeqsCommand();
+	int execute(); 
+	void help();	
+	
+private:
+	struct linePair {
+		int start;
+		int numSeqs;
+		linePair(int i, int j) : start(i), numSeqs(j) {}
+	};
+	vector<int> processIDS;   //processid
+	vector<linePair*> lines;
+	
+	Classify* classify;
+	
+	string fastaFileName, templateFileName, distanceFileName, search, method, taxonomyFileName;
+	int processors, kmerSize, numWanted;
+	float match, misMatch, gapOpen, gapExtend;
+	bool abort;
+	
+	int driver(linePair*, string);
+	void appendTaxFiles(string, string);
+	void createProcesses(string); 
+};
+
+#endif
+
diff --git a/commandfactory.cpp b/commandfactory.cpp
index 5f07abb..030079e 100644
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -58,6 +58,7 @@
 #include "getsharedotucommand.h"
 #include "getlistcountcommand.h"
 #include "hclustercommand.h"
+#include "classifyseqscommand.h"
 
 /***********************************************************/
 
@@ -114,8 +115,8 @@ CommandFactory::CommandFactory(){
 	commands["get.sharedotu"]		= "get.sharedotu";
 	commands["get.listcount"]		= "get.listcount";
 	commands["quit"]				= "quit"; 
-	
 	commands["hcluster"]			= "hcluster"; 
+	commands["classify.seqs"]		= "classify.seqs"; 
 
 }
 /***********************************************************/
@@ -180,8 +181,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
 		else if(commandName == "align.check")			{	command = new AlignCheckCommand(optionString);			}
 		else if(commandName == "get.sharedotu")			{	command = new GetSharedOTUCommand(optionString);		}
 		else if(commandName == "get.listcount")			{	command = new GetListCountCommand(optionString);		}
-		
 		else if(commandName == "hcluster")				{	command = new HClusterCommand(optionString);			}
+		else if(commandName == "classify.seqs")			{	command = new ClassifySeqsCommand(optionString);		}
 		else											{	command = new NoCommand(optionString);					}
 
 		return command;
diff --git a/database.cpp b/database.cpp
index f84c9e1..5f82bc1 100644
--- a/database.cpp
+++ b/database.cpp
@@ -13,73 +13,13 @@
 
 /**************************************************************************************************/
 
-Database::Database(string fastaFileName){		//	This assumes that the template database is in fasta format, may 
-												//	need to alter this in the future?
+Database::Database(){		
 	longest = 0;
-	
-	ifstream fastaFile;
-	openInputFile(fastaFileName, fastaFile);
-	
-	mothurOutEndLine();
-	mothurOut("Reading in the " + fastaFileName + " template sequences...\t");	cout.flush();
-
-	//all of this is elsewhere already!
-	//numSeqs=count(istreambuf_iterator<char>(fastaFile),istreambuf_iterator<char>(), '>');	//	count the number of
-	//fastaFile.seekg(0);																		//	sequences
-	
-	//templateSequences.resize(numSeqs);
-	
-	/*string seqName, sequence;
-	for(int i=0;i<numSeqs;i++){
-		fastaFile >> seqName;
-		seqName = seqName.substr(1);
-		char letter;
-		string aligned;
-		
-		while(fastaFile && (letter=fastaFile.get()) != '>'){
-			if(isprint(letter)){
-				letter = toupper(letter);
-				if(letter == 'U'){letter = 'T';}
-				aligned += letter;
-			}
-		}
-		templateSequences[i] = Sequence(seqName, aligned);
-		
-		//necessary because Sequence constructor by default sets whatever it reads to unaligned
-		//the setUnaligned function remove gap char.
-		templateSequences[i].setUnaligned(templateSequences[i].getUnaligned());
-		
-		if (templateSequences[i].getUnaligned().length() > longest)  { longest = templateSequences[i].getUnaligned().length(); }
-		
-		fastaFile.putback(letter);
-	}*/
-	
-	while (!fastaFile.eof()) {
-		Sequence temp(fastaFile);
-		
-		templateSequences.push_back(temp);
-		
-		//save longest base
-		if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length(); }
-		
-		gobble(fastaFile);
-	}
-	
-	numSeqs = templateSequences.size();
-	
-	fastaFile.close();
-	//all of this is elsewhere already!
-	
-	mothurOut("DONE.");
-	mothurOutEndLine();	cout.flush();
-
+	numSeqs = 0;
 }
 /**************************************************************************************************/
 
-Database::~Database(){														
-
-		templateSequences.clear();
-}
+Database::~Database(){}
 
 /**************************************************************************************************/
 
@@ -91,5 +31,3 @@ float Database::getSearchScore()	{	return searchScore;		}	//	we're assuming that
 int Database::getLongestBase()	{	return longest+1;		}	
 
 /**************************************************************************************************/
-
-
diff --git a/database.hpp b/database.hpp
index 01393c1..01916d1 100644
--- a/database.hpp
+++ b/database.hpp
@@ -11,24 +11,51 @@
  */
 
 
-/* This class is a parent to blastdb, distancedb, kmerdb, suffixdb.  Which are used to convert a squencedb object into that form. */
+/* This class is a parent to blastdb, kmerdb, suffixdb.  */
 
 #include "mothur.h"
+#include "sequence.hpp"
 
-class Sequence;
+/**************************************************************************************************/
+struct seqMatch {  //used to select top n matches
+		int seq;
+		int match;
+		seqMatch(int s, int m) : seq(s), match(m) {}
+};
+/**************************************************************************************************/
+inline bool compareSeqMatches (seqMatch member, seqMatch member2){ //sorts largest to smallest
+	if(member.match > member2.match){
+		return true;   }   
+	else{
+		return false; 
+	}
+}
+/**************************************************************************************************/
+inline bool compareSeqMatchesReverse (seqMatch member, seqMatch member2){ //sorts largest to smallest
+	if(member.match < member2.match){
+		return true;   }   
+	else{
+		return false; 
+	}
+}
 
+/**************************************************************************************************/
 class Database {
+
 public:
-	Database(string);
+	Database();
 	virtual ~Database();
-	virtual Sequence findClosestSequence(Sequence*) = 0;
+	virtual void generateDB() = 0; 
+	virtual void addSequence(Sequence) = 0;  //add sequence to search engine
+	virtual vector<int> findClosestSequences(Sequence*, int) = 0;  // returns indexes of n closest sequences to query
 	virtual float getSearchScore();
 	virtual int getLongestBase(); 
+	virtual void readKmerDB(ifstream&){};
+	virtual void setNumSeqs(int i) {	numSeqs = i; 	}
 	
 protected:
 	int numSeqs, longest;
 	float searchScore;
-	vector<Sequence> templateSequences;
 };
-
+/**************************************************************************************************/
 #endif
diff --git a/distancedb.cpp b/distancedb.cpp
index 30f9359..d6ec258 100644
--- a/distancedb.cpp
+++ b/distancedb.cpp
@@ -54,7 +54,8 @@ Sequence DistanceDB::findClosestSequence(Sequence* candidateSeq){
 	searchScore = 100. * simAccession.simScore;
 	searchIndex++;
 //	return templateSequences[closestMatchIndexNumber];
-	return templateSequences[simAccession.indexNumber];
+	if (templateValid) {  return templateSequences[simAccession.indexNumber];	}
+	else {	return emptySequence;	}
 	
 }
 
diff --git a/distancedb.hpp b/distancedb.hpp
index 8da1d10..b0d46c6 100644
--- a/distancedb.hpp
+++ b/distancedb.hpp
@@ -16,8 +16,9 @@
 class DistanceDB : public Database {
 	
 public:
+
 	DistanceDB(string, string);
-	Sequence findClosestSequence(Sequence*);
+	vector<int> findClosestSequences(Sequence*, int);
 	
 private:
 
@@ -26,8 +27,8 @@ private:
 		int indexNumber;
 		float simScore;
 	};
+	
 	vector<hit> mostSimSequenceVector;
-//	ifstream inputData;
 	int searchIndex;
 };
 
diff --git a/doTaxonomy.cpp b/doTaxonomy.cpp
new file mode 100644
index 0000000..e664764
--- /dev/null
+++ b/doTaxonomy.cpp
@@ -0,0 +1,125 @@
+/*
+ *  doTaxonomy.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 6/17/09.
+ *  Copyright 2009 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "doTaxonomy.h"
+
+/**************************************************************************************************/
+
+PhyloTree::PhyloTree(){
+	
+	numNodes = 1;
+	numSeqs = 0;
+	tree.push_back(TaxNode("Root"));
+	tree[0].heirarchyID = "0";
+}
+
+/**************************************************************************************************/
+
+string PhyloTree::getNextTaxon(string& heirarchy){
+	
+	string currentLevel = "";
+	if(heirarchy != ""){
+		currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
+		heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
+	}
+	return currentLevel;
+	
+}
+
+/**************************************************************************************************/
+
+void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
+
+	numSeqs++;
+
+	map<string, int>::iterator childPointer;
+
+	int currentNode = 0;
+	int level = 1;
+	
+	tree[0].accessions.push_back(seqName);
+	string taxon;// = getNextTaxon(seqTaxonomy);
+
+	while(seqTaxonomy != ""){
+
+		level++;
+
+//somehow the parent is getting one too many accnos
+//use print to reassign the taxa id
+		taxon = getNextTaxon(seqTaxonomy);
+
+		childPointer = tree[currentNode].children.find(taxon);
+
+		if(childPointer != tree[currentNode].children.end()){	//if the node already exists, move on
+			currentNode = childPointer->second;
+			tree[currentNode].accessions.push_back(seqName);
+		}
+		else{											//otherwise, create it
+			tree.push_back(TaxNode(taxon));
+			numNodes++;
+			tree[currentNode].children[taxon] = numNodes-1;
+
+//			int numChildren = tree[currentNode].children.size();
+//			string heirarchyID = tree[currentNode].heirarchyID;
+//			tree[currentNode].accessions.push_back(seqName);
+			
+			currentNode = tree[currentNode].children[taxon];
+			tree[currentNode].accessions.push_back(seqName);
+
+//			tree[currentNode].level = level;
+//			tree[currentNode].childNumber = numChildren;
+//			tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
+		}
+
+	}
+}
+
+/**************************************************************************************************/
+
+void PhyloTree::assignHeirarchyIDs(int index){
+	
+	map<string,int>::iterator it;
+	int counter = 1;
+	
+	for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
+		tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
+		counter++;
+		tree[it->second].level = tree[index].level + 1;
+		assignHeirarchyIDs(it->second);
+
+	}
+	
+}
+
+/**************************************************************************************************/
+
+void PhyloTree::print(ofstream& out){
+	
+	out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
+	print(0, out);
+
+	
+}
+
+/**************************************************************************************************/
+
+void PhyloTree::print(int i, ofstream& out){
+	
+	map<string,int>::iterator it;
+	for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
+		out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
+		print(it->second, out);
+	}
+
+}
+
+/**************************************************************************************************/
+
+
+	
diff --git a/doTaxonomy.h b/doTaxonomy.h
new file mode 100644
index 0000000..c1ec3ad
--- /dev/null
+++ b/doTaxonomy.h
@@ -0,0 +1,42 @@
+/*
+ *  doTaxonomy.h
+ *  
+ *
+ *  Created by Pat Schloss on 6/17/09.
+ *  Copyright 2009 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+
+/**************************************************************************************************/
+
+struct TaxNode {
+	vector<string> accessions;
+	map<string, int> children;
+	int parent, childNumber, level;
+	string name, heirarchyID;
+	
+	TaxNode(string n) : name(n), level(0), parent(-1) {		}
+	TaxNode(){}
+};
+
+/**************************************************************************************************/
+
+class PhyloTree {
+
+public:
+	PhyloTree();
+	void addSeqToTree(string, string);
+	void assignHeirarchyIDs(int);
+	void print(ofstream&);
+private:
+	string getNextTaxon(string&);
+	vector<TaxNode> tree;
+	void print(int, ofstream&);
+	int numNodes;
+	int numSeqs;
+};
+
+/**************************************************************************************************/
+
diff --git a/hcluster.cpp b/hcluster.cpp
index 2d980d7..7613ecb 100644
--- a/hcluster.cpp
+++ b/hcluster.cpp
@@ -235,7 +235,7 @@ void HCluster::updateArrayandLinkTable() {
 			}
 //cout << "here3" << endl;			
 			linkTable[colSpot].erase(size);
-			//linkTable.erase(linkTable.begin()+rowSpot);  //delete row
+			linkTable.erase(linkTable.begin()+rowSpot);  //delete row
 	//printInfo();		
 			//update activerows
 			activeLinks.erase(smallRow);
@@ -243,9 +243,9 @@ void HCluster::updateArrayandLinkTable() {
 			activeLinks[size] = colSpot;
 			
 			//adjust everybody elses spot since you deleted - time vs. space
-			//for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
-				//if (it->second > rowSpot) {  activeLinks[it->first]--;	}
-			//}
+			for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
+				if (it->second > rowSpot) {  activeLinks[it->first]--;	}
+			}
 			
 //cout << "here4" << endl;
 	
diff --git a/kmerdb.cpp b/kmerdb.cpp
index 86dba09..bf8679e 100644
--- a/kmerdb.cpp
+++ b/kmerdb.cpp
@@ -29,27 +29,17 @@
 
 /**************************************************************************************************/
 
-KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerSize(kSize) {
+KmerDB::KmerDB(string fastaFileName, int kSize) : Database(), kmerSize(kSize) {
 	try { 
 	
-		string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
-		ifstream kmerFileTest(kmerDBName.c_str());
+		kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
 		
 		int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+		count = 0;
 		
 		maxKmer = power4s[kmerSize];
 		kmerLocations.resize(maxKmer+1);
 		
-		if(!kmerFileTest){		//	if we can open the kmer db file, then read it in...
-			mothurOut("Generating the " + kmerDBName + " database...\t");	cout.flush();
-			generateKmerDB(kmerDBName);	
-		}
-		else{					//	...otherwise generate it.
-			mothurOut("Reading in the " + kmerDBName + " database...\t");	cout.flush();
-			readKmerDB(kmerDBName, kmerFileTest);
-		}
-		mothurOut("DONE."); mothurOutEndLine();	mothurOutEndLine(); cout.flush();
-		
 	}
 	catch(exception& e) {
 		errorOut(e, "KmerDB", "KmerDB");
@@ -59,27 +49,21 @@ KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerS
 }
 /**************************************************************************************************/
 
-KmerDB::~KmerDB(){														
-	
-		//for (int i = 0; i < templateSequences.size(); i++) {  delete templateSequences[i]; }
-		// templateSequences.clear(); 
-}
+KmerDB::~KmerDB(){}
 
 /**************************************************************************************************/
 
-Sequence KmerDB::findClosestSequence(Sequence* candidateSeq){
+vector<int> KmerDB::findClosestSequences(Sequence* candidateSeq, int num){
 	try {
-	
+		vector<int> topMatches;
 		Kmer kmer(kmerSize);
-		
 		searchScore = 0;
-		int maxSequence = 0;
 		
 		vector<int> matches(numSeqs, 0);						//	a record of the sequences with shared kmers
 		vector<int> timesKmerFound(kmerLocations.size()+1, 0);	//	a record of the kmers that we have already found
 		
 		int numKmers = candidateSeq->getNumBases() - kmerSize + 1;	
-		
+	
 		for(int i=0;i<numKmers;i++){
 			int kmerNumber = kmer.getKmerNumber(candidateSeq->getUnaligned(), i);		//	go through the query sequence and get a kmer number
 			if(timesKmerFound[kmerNumber] == 0){				//	if we haven't seen it before...
@@ -89,46 +73,36 @@ Sequence KmerDB::findClosestSequence(Sequence* candidateSeq){
 			}
 			timesKmerFound[kmerNumber] = 1;						//	ok, we've seen the kmer now
 		}
-		
-		for(int i=0;i<numSeqs;i++){								//	find the db sequence that shares the most kmers with
-			if(matches[i] > searchScore){					//	the query sequence
-				searchScore = matches[i];
-				maxSequence = i;
-			}
+	
+		vector<seqMatch> seqMatches;
+		for(int i=0;i<numSeqs;i++){		
+			seqMatch temp(i, matches[i]);
+			seqMatches.push_back(temp);
 		}
 		
-		searchScore = 100 * searchScore / (float) numKmers;		//	return the Sequence object corresponding to the db
+		//sorts putting largest matches first
+		sort(seqMatches.begin(), seqMatches.end(), compareSeqMatches);
 		
-		return templateSequences[maxSequence];					//	sequence with the most shared kmers.
+		searchScore = seqMatches[0].match;
+		searchScore = 100 * searchScore / (float) numKmers;		//	return the Sequence object corresponding to the db
+	
+		//save top matches
+		for (int i = 0; i < num; i++) {
+			topMatches.push_back(seqMatches[i].seq);
+		}
 		
+		return topMatches;		
 	}
 	catch(exception& e) {
-		errorOut(e, "KmerDB", "findClosestSequence");
+		errorOut(e, "KmerDB", "findClosestSequences");
 		exit(1);
 	}	
 }
 
 /**************************************************************************************************/
 
-void KmerDB::generateKmerDB(string kmerDBName){
+void KmerDB::generateDB(){
 	try {
-	
-		Kmer kmer(kmerSize);
-		
-		for(int i=0;i<numSeqs;i++){								//	for all of the template sequences...
-			
-			string seq = templateSequences[i].getUnaligned();	//	...take the unaligned sequence...
-			int numKmers = seq.length() - kmerSize + 1;
-			
-			vector<int> seenBefore(maxKmer+1,0);
-			for(int j=0;j<numKmers;j++){						//	...step though the sequence and get each kmer...
-				int kmerNumber = kmer.getKmerNumber(seq, j);
-				if(seenBefore[kmerNumber] == 0){
-					kmerLocations[kmerNumber].push_back(i);		//	...insert the sequence index into kmerLocations for
-				}												//	the appropriate kmer number
-				seenBefore[kmerNumber] = 1;
-			}													
-		}
 		
 		ofstream kmerFile;										//	once we have the kmerLocations folder print it out
 		openOutputFile(kmerDBName, kmerFile);					//	to a file
@@ -144,24 +118,47 @@ void KmerDB::generateKmerDB(string kmerDBName){
 		
 	}
 	catch(exception& e) {
-		errorOut(e, "KmerDB", "generateKmerDB");
+		errorOut(e, "KmerDB", "generateDB");
 		exit(1);
 	}	
 	
 }
-
 /**************************************************************************************************/
-
-void KmerDB::readKmerDB(string kmerDBName, ifstream& kmerDBFile){
+void KmerDB::addSequence(Sequence seq) {
 	try {
+		Kmer kmer(kmerSize);
+		
+		string unaligned = seq.getUnaligned();	//	...take the unaligned sequence...
+		int numKmers = unaligned.length() - kmerSize + 1;
+			
+		vector<int> seenBefore(maxKmer+1,0);
+		for(int j=0;j<numKmers;j++){						//	...step though the sequence and get each kmer...
+			int kmerNumber = kmer.getKmerNumber(unaligned, j);
+			if(seenBefore[kmerNumber] == 0){
+				kmerLocations[kmerNumber].push_back(count);		//	...insert the sequence index into kmerLocations for
+			}												//	the appropriate kmer number
+			seenBefore[kmerNumber] = 1;
+		}													
 	
+		count++;
+	}
+	catch(exception& e) {
+		errorOut(e, "KmerDB", "addSequence");
+		exit(1);
+	}	
+}
+/**************************************************************************************************/
+
+void KmerDB::readKmerDB(ifstream& kmerDBFile){
+	try {
+					
 		kmerDBFile.seekg(0);									//	start at the beginning of the file
 		
 		string seqName;
 		int seqNumber;
-		
+
 		for(int i=0;i<maxKmer;i++){
-			int numValues;	
+			int numValues = 0;	
 			kmerDBFile >> seqName >> numValues;
 			
 			for(int j=0;j<numValues;j++){						//	for each kmer number get the...
diff --git a/kmerdb.hpp b/kmerdb.hpp
index d50c8b2..b64412d 100644
--- a/kmerdb.hpp
+++ b/kmerdb.hpp
@@ -27,13 +27,17 @@ class KmerDB : public Database {
 public:
 	KmerDB(string, int);
 	~KmerDB();
-	Sequence findClosestSequence(Sequence*);
-
+	
+	void generateDB();
+	void addSequence(Sequence);
+	vector<int> findClosestSequences(Sequence*, int);
+	void readKmerDB(ifstream&);
+	
 private:
-	void generateKmerDB(string);
-	void readKmerDB(string, ifstream&);
+	
 	int kmerSize;
-	int maxKmer;
+	int maxKmer, count;
+	string kmerDBName;
 	vector<vector<int> > kmerLocations;
 };
 
diff --git a/knn.cpp b/knn.cpp
new file mode 100644
index 0000000..6d57fb1
--- /dev/null
+++ b/knn.cpp
@@ -0,0 +1,109 @@
+/*
+ *  knn.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/4/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "knn.h"
+
+/**************************************************************************************************/
+Knn::Knn(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch, int n) 
+: Classify(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch), num(n)  {}
+/**************************************************************************************************/
+string Knn::getTaxonomy(Sequence* seq) {
+	try {
+		string tax;
+		
+		//use database to find closest seq
+		vector<int> closest = database->findClosestSequences(seq, num);
+		
+		vector<string> closestNames;
+		for (int i = 0; i < closest.size(); i++) {
+			//find that sequences taxonomy in map
+			it = taxonomy.find(names[closest[i]]);
+		
+			//is this sequence in the taxonomy file
+			if (it == taxonomy.end()) { //error not in file
+				mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file.  It will be eliminated as a match to sequence " + seq->getName() + "."); mothurOutEndLine();
+			}else{   closestNames.push_back(it->first);	}
+		}
+		
+		if (closestNames.size() == 0) {
+			mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. " + seq->getName() + " will be disregarded."); mothurOutEndLine();
+			tax = "bad seq";
+		}else{
+			tax = findCommonTaxonomy(closestNames);
+			if (tax == "") { mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); mothurOutEndLine(); tax = "bad seq"; }
+		}
+		
+		return tax;	
+	}
+	catch(exception& e) {
+		errorOut(e, "Knn", "getTaxonomy");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+string Knn::findCommonTaxonomy(vector<string> closest)  {
+	try {
+		vector< vector<string> > taxons;  //taxon[0] = vector of taxonomy info for closest[0].
+										//so if closest[0] taxonomy is Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea;
+										//taxon[0][0] = Bacteria, taxon[0][1] = Alphaproteobacteria....
+										
+		taxons.resize(closest.size());
+		int smallest = 100;
+		
+		for (int i = 0; i < closest.size(); i++) {
+		
+			string tax = taxonomy[closest[i]];  //we know its there since we checked in getTaxonomy
+		
+			tax = tax.substr(0, tax.length()-1);  //get rid of last ';'
+	
+			//parse taxonomy
+			string individual;
+			while (tax.find_first_of(';') != -1) {
+				individual = tax.substr(0,tax.find_first_of(';'));
+				tax = tax.substr(tax.find_first_of(';')+1, tax.length());
+				taxons[i].push_back(individual);
+			
+			}
+			//get last one
+			taxons[i].push_back(tax);
+		
+			//figure out who has the shortest taxonomy info. so you can start comparing there
+			if (taxons[i].size() < smallest) {
+				smallest = taxons[i].size();
+			}
+		}
+	
+		//start at the highest level all the closest seqs have
+		string common = "";
+		for (int i = (smallest-1); i >= 0; i--) {
+
+			string thistax = taxons[0][i];
+			int num = 0;
+			for (int j = 1; j < taxons.size(); j++) {
+				if (taxons[j][i] != thistax) { break; }
+				num = j;
+			}
+		
+			if (num == (taxons.size()-1)) { //they all match at this level
+				for (int k = 0; k <= i; k++) {
+					common += taxons[0][k] + ';';
+				}
+				break;
+			}
+		}
+	
+		return common;
+	}
+	catch(exception& e) {
+		errorOut(e, "Knn", "findCommonTaxonomy");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+
diff --git a/knn.h b/knn.h
new file mode 100644
index 0000000..15a33b0
--- /dev/null
+++ b/knn.h
@@ -0,0 +1,35 @@
+#ifndef KNN_H
+#define KNN_H
+
+/*
+ *  knn.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/4/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+ 
+#include "mothur.h"
+#include "classify.h"
+
+/**************************************************************************************************/
+
+class Knn : public Classify {
+	
+public:
+	Knn(string, string, string, int, int, int, int, int, int);
+	~Knn() {};
+	
+	string getTaxonomy(Sequence*);
+	
+private:
+	int num;
+	string findCommonTaxonomy(vector<string>);
+};
+
+/**************************************************************************************************/
+
+#endif
+
+
diff --git a/nast.cpp b/nast.cpp
index f247037..4848eb2 100644
--- a/nast.cpp
+++ b/nast.cpp
@@ -40,12 +40,12 @@ Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method
 void Nast::pairwiseAlignSeqs(){	//	Here we call one of the pairwise alignment methods to align our unaligned candidate
 								//	and template sequences
 	try {
-		
+
 		alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
-	
+		
 		string candAln = alignment->getSeqAAln();
 		string tempAln = alignment->getSeqBAln();
-
+		
 		if(candAln == ""){
 
 			candidateSeq->setPairwise("");
diff --git a/phylotype.cpp b/phylotype.cpp
new file mode 100644
index 0000000..f7af97a
--- /dev/null
+++ b/phylotype.cpp
@@ -0,0 +1,42 @@
+/*
+ *  phylotype.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/3/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "phylotype.h"
+
+/**************************************************************************************************/
+PhyloType::PhyloType(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch) 
+: Classify(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch)  {}
+/**************************************************************************************************/
+string PhyloType::getTaxonomy(Sequence* seq) {
+	try {
+		string tax;
+		
+		//use database to find closest seq
+		vector<int> closest = database->findClosestSequences(seq, 1);
+		
+		//find that sequences taxonomy in map
+		it = taxonomy.find(names[closest[0]]);
+		
+		//is this sequence in the taxonomy file
+		if (it == taxonomy.end()) { //error not in file
+			mothurOut("Error: sequence " + names[closest[0]] + " is not in the taxonomy file.  It is the closest match to sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); mothurOutEndLine();
+			tax = "bad seq";
+		}else {
+			tax = it->second;
+		}
+		
+		return tax;	
+	}
+	catch(exception& e) {
+		errorOut(e, "PhyloType", "getTaxonomy");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+
diff --git a/phylotype.h b/phylotype.h
new file mode 100644
index 0000000..ee649e3
--- /dev/null
+++ b/phylotype.h
@@ -0,0 +1,34 @@
+#ifndef PHYLOTYPE_H
+#define PHYLOTYPE_H
+
+/*
+ *  phylotype.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/3/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "classify.h"
+
+/**************************************************************************************************/
+
+class PhyloType : public Classify {
+	
+public:
+	PhyloType(string, string, string, int, int, int, int, int);
+	~PhyloType() {};
+	
+	string getTaxonomy(Sequence*);
+	
+	
+private:
+	
+};
+
+/**************************************************************************************************/
+
+#endif
+
diff --git a/suffixdb.cpp b/suffixdb.cpp
index 5bcd8e9..39a9b54 100644
--- a/suffixdb.cpp
+++ b/suffixdb.cpp
@@ -22,41 +22,58 @@
 
 /**************************************************************************************************/
 
-SuffixDB::SuffixDB(string fastaFileName) : Database(fastaFileName) {
-
+SuffixDB::SuffixDB(int numSeqs) : Database() {
 	suffixForest.resize(numSeqs);
-	mothurOut("Generating the suffix tree database...\t");	cout.flush();
-	for(int i=0;i<numSeqs;i++){								//	The parent class' constructor generates the vector of
-		suffixForest[i].loadSequence(templateSequences[i]);	//	template Sequence objects.  Here each of these objects
-	}														//	is used to generate a suffix tree, aka the suffix forest
-	mothurOut("DONE."); mothurOutEndLine();	mothurOutEndLine(); cout.flush();
-
+	count = 0;
 }
-
 /**************************************************************************************************/
-
-Sequence SuffixDB::findClosestSequence(Sequence* candidateSeq){
-
-	int minValue = 2000;
-	int closestSequenceNo = 0;
-	string processedSeq = candidateSeq->convert2ints();		//	the candidate sequence needs to be a string of ints
-	for(int i=0;i<suffixForest.size();i++){					//	scan through the forest and see what the minimum
-		int count = suffixForest[i].countSuffixes(processedSeq, minValue);	//	return score is and keep track of the
-		if(count == minValue){								//	template sequence index that corresponds to that score
-			closestSequenceNo = i;
+//assumes sequences have been added using addSequence
+vector<int> SuffixDB::findClosestSequences(Sequence* candidateSeq, int num){
+	try {
+		vector<int> topMatches;
+		string processedSeq = candidateSeq->convert2ints();		//	the candidate sequence needs to be a string of ints
+		
+		vector<seqMatch> seqMatches;
+		for(int i=0;i<suffixForest.size();i++){					//	scan through the forest and see what the minimum
+			int count = suffixForest[i].countSuffixes(processedSeq);	//	return score is and keep track of the
+			seqMatch temp(i, count);
+			seqMatches.push_back(temp);
+		}
+		
+		//sorts putting smallest matches first
+		sort(seqMatches.begin(), seqMatches.end(), compareSeqMatchesReverse);
+		
+		searchScore = seqMatches[0].match;
+		searchScore = 100 * (1. - searchScore / (float)processedSeq.length());
+		
+		//save top matches
+		for (int i = 0; i < num; i++) {
+			topMatches.push_back(seqMatches[i].seq);
 		}
+
+		//	return the Sequence object that has the minimum score
+		return topMatches;
+	}
+	catch(exception& e) {
+		errorOut(e, "SuffixDB", "findClosestSequences");
+		exit(1);
+	}	
+}
+/**************************************************************************************************/
+//adding the sequences generates the db
+void SuffixDB::addSequence(Sequence seq) {
+	try {
+		suffixForest[count].loadSequence(seq);		
+		count++;
+	}
+	catch(exception& e) {
+		errorOut(e, "SuffixDB", "addSequence");
+		exit(1);
 	}
-	searchScore = 100 * (1. - minValue / (float)processedSeq.length());
-	return templateSequences[closestSequenceNo];			//	return the Sequence object that has the minimum score
-	
 }
-
 /**************************************************************************************************/
 
 SuffixDB::~SuffixDB(){														
-	
 	for (int i = (suffixForest.size()-1); i >= 0; i--) {  suffixForest.pop_back();  }
-	// templateSequences.clear();
-
 }
 /**************************************************************************************************/
diff --git a/suffixdb.hpp b/suffixdb.hpp
index f211aa7..1baa99e 100644
--- a/suffixdb.hpp
+++ b/suffixdb.hpp
@@ -26,12 +26,16 @@ class SuffixTree;
 class SuffixDB : public Database {
 	
 public:
-	SuffixDB(string);
+	SuffixDB(int);
 	~SuffixDB();
-	Sequence findClosestSequence(Sequence*);
+	
+	void generateDB() {}; //adding sequences generates the db
+	void addSequence(Sequence);
+	vector<int> findClosestSequences(Sequence*, int);
 
 private:
 	vector<SuffixTree> suffixForest;
+	int count;
 };
 
 #endif
diff --git a/suffixtree.cpp b/suffixtree.cpp
index 53a6d3f..2842b36 100644
--- a/suffixtree.cpp
+++ b/suffixtree.cpp
@@ -125,7 +125,46 @@ int SuffixTree::countSuffixes(string compareSequence, int& minValue){	//	here we
 	return numSuffixes;											//	change the value and return the number of suffixes
 	
 }
+//********************************************************************************************************************
 
+int SuffixTree::countSuffixes(string compareSequence){	//	here we count the number of suffix parts 
+															//	we need to rewrite a user supplied sequence.  if the 
+	int numSuffixes = 0;									//	count exceeds the supplied minValue, bail out.  The
+	int seqLength = compareSequence.length();				//	time complexity should be O(L)
+	int position = 0;
+	
+	int presentNode = 0;
+	
+	while(position < seqLength){		//	while the position in the query sequence isn't at the end...
+		
+		int newNode = nodeVector[presentNode]->getChild(compareSequence[position]);	//	see if the current node has a
+																//	child that matches the next character in the query
+		if(newNode == -1){										
+			if(presentNode == 0){	position++;		}			//	if not, go back to the root and increase the count
+			numSuffixes++;										//	by one.
+			presentNode = 0;
+		}
+		else{													//	if there is, move to that node and see how far down
+			presentNode = newNode;								//	it we can get
+			
+			for(int i=nodeVector[newNode]->getStartCharPos(); i<=nodeVector[newNode]->getEndCharPos(); i++){
+				if(compareSequence[position] == sequence[i]){
+					position++;									//	as long as the query and branch agree, keep going
+				}
+				else{
+					numSuffixes++;								//	if there is a mismatch, increase the number of 
+					presentNode = 0;							//	suffixes and go back to the root
+					break;
+				}
+			}
+		}
+		//	if we get all the way through the node we'll go to the top of the while loop and find the child node
+		//	that corresponds to what we are interested in		
+	}
+	numSuffixes--;												//	the method puts an extra count on numSuffixes
+	
+	return numSuffixes;											//	change the value and return the number of suffixes
+}
 //********************************************************************************************************************
 
 void SuffixTree::canonize(){	//	if you have to ask how this works, you don't really want to know and this really
diff --git a/suffixtree.hpp b/suffixtree.hpp
index b85cee9..ab262ba 100644
--- a/suffixtree.hpp
+++ b/suffixtree.hpp
@@ -42,6 +42,7 @@ public:
 	string getSeqName();
 	void print();	
 	int countSuffixes(string, int&);
+	int countSuffixes(string);	
 
 private:	
 	void addPrefix(int);