From 526a868606faa50caf86e7399f7554c0335b39e5 Mon Sep 17 00:00:00 2001
From: westcott <westcott>
Date: Mon, 18 May 2009 17:37:53 +0000
Subject: [PATCH] added alignment code

---
 Mothur.xcodeproj/project.pbxproj | 102 ++++++++-
 aligncommand.cpp                 | 151 +++++++++++++
 aligncommand.h                   |  35 +++
 alignment.cpp                    | 163 ++++++++++++++
 alignment.hpp                    |  53 +++++
 alignmentcell.cpp                |  18 ++
 alignmentcell.hpp                |  30 +++
 blastalign.cpp                   | 156 +++++++++++++
 blastalign.hpp                   |  34 +++
 blastdb.cpp                      |  95 ++++++++
 blastdb.hpp                      |  34 +++
 commandfactory.cpp               |   2 +
 database.cpp                     |  32 +--
 database.hpp                     |   7 +-
 distancecommand.cpp              |   9 +-
 distancedb.cpp                   |  65 ++++++
 distancedb.hpp                   |  36 +++
 errorchecking.cpp                |  32 ++-
 errorchecking.h                  |   2 +-
 globaldata.cpp                   |  45 +++-
 globaldata.hpp                   |  12 +-
 gotohoverlap.cpp                 |  90 ++++++++
 gotohoverlap.hpp                 |  43 ++++
 kmer.cpp                         |  70 ++++--
 kmer.hpp                         |   4 +-
 kmerdb.cpp                       | 102 +++++----
 kmerdb.hpp                       |   9 +
 nast.cpp                         | 362 +++++++++++++++++++++++++++++++
 nast.hpp                         |  49 +++++
 nastreport.cpp                   |  92 ++++++++
 nastreport.hpp                   |  51 +++++
 needlemanoverlap.cpp             |  92 ++++++++
 needlemanoverlap.hpp             |  43 ++++
 noalign.cpp                      |  25 +++
 noalign.hpp                      |  31 +++
 overlap.cpp                      |  83 +++++++
 overlap.hpp                      |  37 ++++
 rarefact.cpp                     |  16 +-
 sequence.cpp                     |  45 ++++
 sequence.hpp                     |   9 +-
 suffixdb.cpp                     |  56 +++++
 suffixdb.hpp                     |  36 +++
 suffixnodes.cpp                  |  96 ++++++++
 suffixnodes.hpp                  |  77 +++++++
 suffixtree.cpp                   | 247 +++++++++++++++++++++
 suffixtree.hpp                   |  69 ++++++
 validcommands.cpp                |   1 +
 validparameter.cpp               |   4 +-
 48 files changed, 2841 insertions(+), 111 deletions(-)
 create mode 100644 aligncommand.cpp
 create mode 100644 aligncommand.h
 create mode 100644 alignment.cpp
 create mode 100644 alignment.hpp
 create mode 100644 alignmentcell.cpp
 create mode 100644 alignmentcell.hpp
 create mode 100644 blastalign.cpp
 create mode 100644 blastalign.hpp
 create mode 100644 blastdb.cpp
 create mode 100644 blastdb.hpp
 create mode 100644 distancedb.cpp
 create mode 100644 distancedb.hpp
 create mode 100644 gotohoverlap.cpp
 create mode 100644 gotohoverlap.hpp
 create mode 100644 nast.cpp
 create mode 100644 nast.hpp
 create mode 100644 nastreport.cpp
 create mode 100644 nastreport.hpp
 create mode 100644 needlemanoverlap.cpp
 create mode 100644 needlemanoverlap.hpp
 create mode 100644 noalign.cpp
 create mode 100644 noalign.hpp
 create mode 100644 overlap.cpp
 create mode 100644 overlap.hpp
 create mode 100644 suffixdb.cpp
 create mode 100644 suffixdb.hpp
 create mode 100644 suffixnodes.cpp
 create mode 100644 suffixnodes.hpp
 create mode 100644 suffixtree.cpp
 create mode 100644 suffixtree.hpp
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj
index 7beb6cb..7bd433f 100644
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -14,6 +14,20 @@
 		372E12700F26365B0095CF7E /* readotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E126F0F26365B0095CF7E /* readotucommand.cpp */; };
 		372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12950F263D5A0095CF7E /* readdistcommand.cpp */; };
 		372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12EC0F264D320095CF7E /* commandfactory.cpp */; };
+		373C68A40FC1C07D00137ACD /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C68A00FC1C07D00137ACD /* alignment.cpp */; };
+		373C68A50FC1C07D00137ACD /* alignmentcell.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C68A20FC1C07D00137ACD /* alignmentcell.cpp */; };
+		373C68BC0FC1C1A600137ACD /* gotohoverlap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C68BA0FC1C1A600137ACD /* gotohoverlap.cpp */; };
+		373C68C50FC1C25F00137ACD /* overlap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C68C30FC1C25F00137ACD /* overlap.cpp */; };
+		373C68CF0FC1C2D900137ACD /* needlemanoverlap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C68CD0FC1C2D900137ACD /* needlemanoverlap.cpp */; };
+		373C68DD0FC1C38D00137ACD /* blastalign.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C68DB0FC1C38D00137ACD /* blastalign.cpp */; };
+		373C68E50FC1C4A500137ACD /* noalign.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C68E30FC1C4A500137ACD /* noalign.cpp */; };
+		373C68F90FC1C6A800137ACD /* suffixdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C68F30FC1C6A800137ACD /* suffixdb.cpp */; };
+		373C68FA0FC1C6A800137ACD /* suffixnodes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C68F50FC1C6A800137ACD /* suffixnodes.cpp */; };
+		373C68FB0FC1C6A800137ACD /* suffixtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C68F70FC1C6A800137ACD /* suffixtree.cpp */; };
+		373C69180FC1C8AF00137ACD /* blastdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69160FC1C8AF00137ACD /* blastdb.cpp */; };
+		373C691F0FC1C98600137ACD /* nast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C691D0FC1C98600137ACD /* nast.cpp */; };
+		373C692B0FC1C9EB00137ACD /* nastreport.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69290FC1C9EB00137ACD /* nastreport.cpp */; };
+		373C69340FC1CA9E00137ACD /* distancedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69320FC1CA9E00137ACD /* distancedb.cpp */; };
 		374610780F40645300460C57 /* unifracweightedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 374610770F40645300460C57 /* unifracweightedcommand.cpp */; };
 		3746107E0F4064D100460C57 /* weighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3746107D0F4064D100460C57 /* weighted.cpp */; };
 		374610830F40652400460C57 /* unweighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 374610820F40652400460C57 /* unweighted.cpp */; };
@@ -45,6 +59,7 @@
 		378C1B0A0FB0644E004D63F5 /* sequencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */; };
 		378C1B0B0FB0644E004D63F5 /* sharedjackknife.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */; };
 		378C1B0C0FB0644E004D63F5 /* sharedmarczewski.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */; };
+		378DC5CF0FBDE1C8003B8607 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378DC5CE0FBDE1C8003B8607 /* aligncommand.cpp */; };
 		379293C30F2DE73400B9034A /* treemap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379293C20F2DE73400B9034A /* treemap.cpp */; };
 		379294700F2E191800B9034A /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3792946F0F2E191800B9034A /* parsimonycommand.cpp */; };
 		3792948A0F2E258500B9034A /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379294890F2E258500B9034A /* parsimony.cpp */; };
@@ -168,6 +183,34 @@
 		372E12940F263D5A0095CF7E /* readdistcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readdistcommand.h; sourceTree = "<group>"; };
 		372E12950F263D5A0095CF7E /* readdistcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readdistcommand.cpp; sourceTree = "<group>"; };
 		372E12EC0F264D320095CF7E /* commandfactory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = commandfactory.cpp; sourceTree = "<group>"; };
+		373C68A00FC1C07D00137ACD /* alignment.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignment.cpp; sourceTree = "<group>"; };
+		373C68A10FC1C07D00137ACD /* alignment.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = alignment.hpp; sourceTree = "<group>"; };
+		373C68A20FC1C07D00137ACD /* alignmentcell.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignmentcell.cpp; sourceTree = "<group>"; };
+		373C68A30FC1C07D00137ACD /* alignmentcell.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = alignmentcell.hpp; sourceTree = "<group>"; };
+		373C68BA0FC1C1A600137ACD /* gotohoverlap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = gotohoverlap.cpp; sourceTree = "<group>"; };
+		373C68BB0FC1C1A600137ACD /* gotohoverlap.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = gotohoverlap.hpp; sourceTree = "<group>"; };
+		373C68C30FC1C25F00137ACD /* overlap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = overlap.cpp; sourceTree = "<group>"; };
+		373C68C40FC1C25F00137ACD /* overlap.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = overlap.hpp; sourceTree = "<group>"; };
+		373C68CD0FC1C2D900137ACD /* needlemanoverlap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = needlemanoverlap.cpp; sourceTree = "<group>"; };
+		373C68CE0FC1C2D900137ACD /* needlemanoverlap.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = needlemanoverlap.hpp; sourceTree = "<group>"; };
+		373C68DB0FC1C38D00137ACD /* blastalign.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = blastalign.cpp; sourceTree = "<group>"; };
+		373C68DC0FC1C38D00137ACD /* blastalign.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = blastalign.hpp; sourceTree = "<group>"; };
+		373C68E30FC1C4A500137ACD /* noalign.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = noalign.cpp; sourceTree = "<group>"; };
+		373C68E40FC1C4A500137ACD /* noalign.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = noalign.hpp; sourceTree = "<group>"; };
+		373C68F30FC1C6A800137ACD /* suffixdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = suffixdb.cpp; sourceTree = "<group>"; };
+		373C68F40FC1C6A800137ACD /* suffixdb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = suffixdb.hpp; sourceTree = "<group>"; };
+		373C68F50FC1C6A800137ACD /* suffixnodes.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = suffixnodes.cpp; sourceTree = "<group>"; };
+		373C68F60FC1C6A800137ACD /* suffixnodes.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = suffixnodes.hpp; sourceTree = "<group>"; };
+		373C68F70FC1C6A800137ACD /* suffixtree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = suffixtree.cpp; sourceTree = "<group>"; };
+		373C68F80FC1C6A800137ACD /* suffixtree.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = suffixtree.hpp; sourceTree = "<group>"; };
+		373C69160FC1C8AF00137ACD /* blastdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = blastdb.cpp; sourceTree = "<group>"; };
+		373C69170FC1C8AF00137ACD /* blastdb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = blastdb.hpp; sourceTree = "<group>"; };
+		373C691D0FC1C98600137ACD /* nast.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nast.cpp; sourceTree = "<group>"; };
+		373C691E0FC1C98600137ACD /* nast.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = nast.hpp; sourceTree = "<group>"; };
+		373C69290FC1C9EB00137ACD /* nastreport.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nastreport.cpp; sourceTree = "<group>"; };
+		373C692A0FC1C9EB00137ACD /* nastreport.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = nastreport.hpp; sourceTree = "<group>"; };
+		373C69320FC1CA9E00137ACD /* distancedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = distancedb.cpp; sourceTree = "<group>"; };
+		373C69330FC1CA9E00137ACD /* distancedb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = distancedb.hpp; sourceTree = "<group>"; };
 		374610760F40645300460C57 /* unifracweightedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unifracweightedcommand.h; sourceTree = "<group>"; };
 		374610770F40645300460C57 /* unifracweightedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = unifracweightedcommand.cpp; sourceTree = "<group>"; };
 		3746107C0F4064D100460C57 /* weighted.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = weighted.h; sourceTree = "<group>"; };
@@ -233,6 +276,8 @@
 		378C1B000FB0644D004D63F5 /* sharedjackknife.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedjackknife.h; sourceTree = "<group>"; };
 		378C1B010FB0644D004D63F5 /* sharedmarczewski.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedmarczewski.cpp; sourceTree = "<group>"; };
 		378C1B020FB0644D004D63F5 /* sharedmarczewski.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedmarczewski.h; sourceTree = "<group>"; };
+		378DC5CD0FBDE1C8003B8607 /* aligncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligncommand.h; sourceTree = "<group>"; };
+		378DC5CE0FBDE1C8003B8607 /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligncommand.cpp; sourceTree = "<group>"; };
 		379293C10F2DE73400B9034A /* treemap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treemap.h; sourceTree = "<group>"; };
 		379293C20F2DE73400B9034A /* treemap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treemap.cpp; sourceTree = "<group>"; };
 		3792946E0F2E191800B9034A /* parsimonycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsimonycommand.h; sourceTree = "<group>"; };
@@ -465,6 +510,8 @@
 			isa = PBXGroup;
 			children = (
 				37D927BA0F21331F001D4494 /* averagelinkage.cpp */,
+				373C68DC0FC1C38D00137ACD /* blastalign.hpp */,
+				373C68DB0FC1C38D00137ACD /* blastalign.cpp */,
 				37D928A60F2133C0001D4494 /* calculators */,
 				37D927C20F21331F001D4494 /* cluster.hpp */,
 				37D927C10F21331F001D4494 /* cluster.cpp */,
@@ -479,8 +526,6 @@
 				37D927D10F21331F001D4494 /* commandoptionparser.hpp */,
 				37D927D00F21331F001D4494 /* commandoptionparser.cpp */,
 				37D927D20F21331F001D4494 /* completelinkage.cpp */,
-				37D927D40F21331F001D4494 /* database.hpp */,
-				37D927D30F21331F001D4494 /* database.cpp */,
 				7E4130F70F8E58FA00381DD0 /* dlibshuff.h */,
 				7E4130F60F8E58FA00381DD0 /* dlibshuff.cpp */,
 				37D927D60F21331F001D4494 /* display.h */,
@@ -491,21 +536,29 @@
 				37D927DD0F21331F001D4494 /* fileoutput.cpp */,
 				37D927E00F21331F001D4494 /* globaldata.hpp */,
 				37D927DF0F21331F001D4494 /* globaldata.cpp */,
+				373C68BB0FC1C1A600137ACD /* gotohoverlap.hpp */,
+				373C68BA0FC1C1A600137ACD /* gotohoverlap.cpp */,
 				375873ED0F7D646F0040F377 /* heatmap.h */,
 				375873EE0F7D646F0040F377 /* heatmap.cpp */,
 				37D927E60F21331F001D4494 /* inputdata.h */,
 				37D927E50F21331F001D4494 /* inputdata.cpp */,
-				37D927EA0F21331F001D4494 /* kmer.hpp */,
-				37D927E90F21331F001D4494 /* kmer.cpp */,
-				37D927EC0F21331F001D4494 /* kmerdb.hpp */,
-				37D927EB0F21331F001D4494 /* kmerdb.cpp */,
 				7E412F420F8D213C00381DD0 /* libshuff.h */,
 				7E412FE90F8D3E2C00381DD0 /* libshuff.cpp */,
 				37E3ED800F4D9D0D00B5DF02 /* mothur.h */,
 				37D927EF0F21331F001D4494 /* mothur.cpp */,
 				37D927F10F21331F001D4494 /* nameassignment.hpp */,
 				37D927F00F21331F001D4494 /* nameassignment.cpp */,
+				373C691E0FC1C98600137ACD /* nast.hpp */,
+				373C691D0FC1C98600137ACD /* nast.cpp */,
+				373C692A0FC1C9EB00137ACD /* nastreport.hpp */,
+				373C69290FC1C9EB00137ACD /* nastreport.cpp */,
+				373C68CE0FC1C2D900137ACD /* needlemanoverlap.hpp */,
+				373C68CD0FC1C2D900137ACD /* needlemanoverlap.cpp */,
+				373C68E40FC1C4A500137ACD /* noalign.hpp */,
+				373C68E30FC1C4A500137ACD /* noalign.cpp */,
 				37D927F60F21331F001D4494 /* observable.h */,
+				373C68C40FC1C25F00137ACD /* overlap.hpp */,
+				373C68C30FC1C25F00137ACD /* overlap.cpp */,
 				37D927FC0F21331F001D4494 /* progress.hpp */,
 				37D927FB0F21331F001D4494 /* progress.cpp */,
 				37D928040F21331F001D4494 /* raredisplay.h */,
@@ -672,6 +725,8 @@
 			isa = PBXGroup;
 			children = (
 				37D927CD0F21331F001D4494 /* command.hpp */,
+				378DC5CD0FBDE1C8003B8607 /* aligncommand.h */,
+				378DC5CE0FBDE1C8003B8607 /* aligncommand.cpp */,
 				37C1D9710F86506E0059E3F0 /* binsequencecommand.h */,
 				37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */,
 				21DDC0190F97A8FE0060691C /* bootstrapsharedcommand.h */,
@@ -743,13 +798,27 @@
 		37D928AC0F213420001D4494 /* containers */ = {
 			isa = PBXGroup;
 			children = (
+				373C68A10FC1C07D00137ACD /* alignment.hpp */,
+				373C68A00FC1C07D00137ACD /* alignment.cpp */,
+				373C68A30FC1C07D00137ACD /* alignmentcell.hpp */,
+				373C68A20FC1C07D00137ACD /* alignmentcell.cpp */,
+				373C69170FC1C8AF00137ACD /* blastdb.hpp */,
+				373C69160FC1C8AF00137ACD /* blastdb.cpp */,
+				37D927D40F21331F001D4494 /* database.hpp */,
+				37D927D30F21331F001D4494 /* database.cpp */,
 				37D927D50F21331F001D4494 /* datavector.hpp */,
+				373C69330FC1CA9E00137ACD /* distancedb.hpp */,
+				373C69320FC1CA9E00137ACD /* distancedb.cpp */,
 				37D927DC0F21331F001D4494 /* fastamap.h */,
 				37D927DB0F21331F001D4494 /* fastamap.cpp */,
 				375873EA0F7D64520040F377 /* fullmatrix.h */,
 				375873EB0F7D64520040F377 /* fullmatrix.cpp */,
 				37D927E20F21331F001D4494 /* groupmap.h */,
 				37D927E10F21331F001D4494 /* groupmap.cpp */,
+				37D927EA0F21331F001D4494 /* kmer.hpp */,
+				37D927E90F21331F001D4494 /* kmer.cpp */,
+				37D927EC0F21331F001D4494 /* kmerdb.hpp */,
+				37D927EB0F21331F001D4494 /* kmerdb.cpp */,
 				37D927EE0F21331F001D4494 /* listvector.hpp */,
 				37D927ED0F21331F001D4494 /* listvector.cpp */,
 				37D927F80F21331F001D4494 /* ordervector.hpp */,
@@ -772,6 +841,12 @@
 				37D928320F21331F001D4494 /* sharedsabundvector.cpp */,
 				37D928450F21331F001D4494 /* sparsematrix.hpp */,
 				37D928440F21331F001D4494 /* sparsematrix.cpp */,
+				373C68F40FC1C6A800137ACD /* suffixdb.hpp */,
+				373C68F30FC1C6A800137ACD /* suffixdb.cpp */,
+				373C68F60FC1C6A800137ACD /* suffixnodes.hpp */,
+				373C68F50FC1C6A800137ACD /* suffixnodes.cpp */,
+				373C68F80FC1C6A800137ACD /* suffixtree.hpp */,
+				373C68F70FC1C6A800137ACD /* suffixtree.cpp */,
 				37AD4DB90F28E2FE00AA2D49 /* tree.h */,
 				37AD4DBA0F28E2FE00AA2D49 /* tree.cpp */,
 				379293C10F2DE73400B9034A /* treemap.h */,
@@ -979,6 +1054,21 @@
 				378C1B0C0FB0644E004D63F5 /* sharedmarczewski.cpp in Sources */,
 				37C753CE0FB3415200DBD02E /* distancecommand.cpp in Sources */,
 				379643ED0FB9B5A80081FDB6 /* readseqs.cpp in Sources */,
+				378DC5CF0FBDE1C8003B8607 /* aligncommand.cpp in Sources */,
+				373C68A40FC1C07D00137ACD /* alignment.cpp in Sources */,
+				373C68A50FC1C07D00137ACD /* alignmentcell.cpp in Sources */,
+				373C68BC0FC1C1A600137ACD /* gotohoverlap.cpp in Sources */,
+				373C68C50FC1C25F00137ACD /* overlap.cpp in Sources */,
+				373C68CF0FC1C2D900137ACD /* needlemanoverlap.cpp in Sources */,
+				373C68DD0FC1C38D00137ACD /* blastalign.cpp in Sources */,
+				373C68E50FC1C4A500137ACD /* noalign.cpp in Sources */,
+				373C68F90FC1C6A800137ACD /* suffixdb.cpp in Sources */,
+				373C68FA0FC1C6A800137ACD /* suffixnodes.cpp in Sources */,
+				373C68FB0FC1C6A800137ACD /* suffixtree.cpp in Sources */,
+				373C69180FC1C8AF00137ACD /* blastdb.cpp in Sources */,
+				373C691F0FC1C98600137ACD /* nast.cpp in Sources */,
+				373C692B0FC1C9EB00137ACD /* nastreport.cpp in Sources */,
+				373C69340FC1CA9E00137ACD /* distancedb.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/aligncommand.cpp b/aligncommand.cpp
new file mode 100644
index 0000000..81f8e6a
--- /dev/null
+++ b/aligncommand.cpp
@@ -0,0 +1,151 @@
+/*
+ *  aligncommand.cpp
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 5/15/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ *	This version of nast does everything I think that the greengenes nast server does and then some.  I have added the 
+ *	feature of allowing users to define their database, kmer size for searching, alignment penalty values and alignment 
+ *	method.  This latter feature is perhaps most significant.  nastPlus enables a user to use either a Needleman-Wunsch 
+ *	(non-affine gap penalty) or Gotoh (affine gap penalty) pairwise alignment algorithm.  This is significant because it
+ *	allows for a global alignment and not the local alignment provided by bLAst.  Furthermore, it has the potential to
+ *	provide a better alignment because of the banding method employed by blast (I'm not sure about this).
+ *
+ *	to compile type:
+ *		make
+ *
+ *	for basic instructions on how to run nastPlus type:
+ *		./nastPlus
+ */
+
+#include "aligncommand.h"
+#include "sequence.hpp"
+
+#include "alignment.hpp"
+#include "gotohoverlap.hpp"
+#include "needlemanoverlap.hpp"
+#include "blastalign.hpp"
+#include "noalign.hpp"
+
+#include "database.hpp"
+#include "kmerdb.hpp"
+#include "suffixdb.hpp"
+#include "blastdb.hpp"
+#include "distancedb.hpp"
+
+#include "nast.hpp"
+#include "nastreport.hpp"
+
+
+//**********************************************************************************************************************
+AlignCommand::AlignCommand(){
+	try {
+		globaldata = GlobalData::getInstance();
+		candidateFileName = globaldata->inputFileName;
+		templateFileName = globaldata->getTemplateFile();
+		openInputFile(candidateFileName, in);
+		convert(globaldata->getKSize(), kmerSize);
+		convert(globaldata->getMatch(), match);
+		convert(globaldata->getMismatch(), misMatch);
+		convert(globaldata->getGapopen(), gapOpen);
+		convert(globaldata->getGapextend(), gapExtend);
+		distanceFileName = "????";
+	}
+	catch(exception& e) {
+		cout << "Standard Error: " << e.what() << " has occurred in the AlignCommand class Function AlignCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+		exit(1);
+	}
+	catch(...) {
+		cout << "An unknown error has occurred in the AlignCommand class function AlignCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+		exit(1);
+	}	
+}
+
+//**********************************************************************************************************************
+
+AlignCommand::~AlignCommand(){
+	
+}
+
+//**********************************************************************************************************************
+
+int AlignCommand::execute(){
+	try {
+		srand( (unsigned)time( NULL ) );  //needed to assign names to temporary files
+		
+		Database* templateDB;
+		if(globaldata->getSearch() == "kmer")			{	templateDB = new KmerDB(templateFileName, kmerSize);								}
+		else if(globaldata->getSearch() == "suffix")	{	templateDB = new SuffixDB(templateFileName);										}
+		else if(globaldata->getSearch() == "blast")		{	templateDB = new BlastDB(templateFileName, gapOpen, gapExtend, match, misMatch);	}
+		else if(globaldata->getSearch() == "distance")	{	templateDB = new DistanceDB(templateFileName, distanceFileName);					}
+		else {	cout << globaldata->getSearch() << " is not a valid search option. I will run the command using suffix." << endl;
+				templateDB = new SuffixDB(templateFileName);		}
+	
+		Alignment* alignment;
+		if(globaldata->getAlign() == "gotoh")					{	alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, 3000);	}
+		else if(globaldata->getAlign() == "needleman")			{	alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 3000);			}
+		else if(globaldata->getAlign() == "blast")				{	alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);		}
+		else if(globaldata->getAlign() == "noalign")			{	alignment = new NoAlign();													}
+		else {	cout << globaldata->getAlign() << " is not a valid alignment option. I will run the command using blast." << endl;
+				alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);	}
+				
+		int numFastaSeqs=count(istreambuf_iterator<char>(in),istreambuf_iterator<char>(), '>');
+		in.seekg(0);
+	
+		string candidateAligngmentFName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + globaldata->getSearch() + '.' + globaldata->getAlign() + ".nast.align";
+		ofstream candidateAlignmentFile;
+		openOutputFile(candidateAligngmentFName, candidateAlignmentFile);
+
+		string candidateReportFName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + globaldata->getSearch() + '.' + globaldata->getAlign() + ".nast.report";
+		NastReport report(candidateReportFName);
+
+		cout << "We are going to align the " << numFastaSeqs << " sequences in " << candidateFileName << "..." << endl;
+		cout.flush();
+	
+		int start = time(NULL);
+
+		for(int i=0;i<numFastaSeqs;i++){
+
+			Sequence* candidateSeq = new Sequence(in);
+			report.setCandidate(candidateSeq);
+
+			Sequence* templateSeq = templateDB->findClosestSequence(candidateSeq);
+			report.setTemplate(templateSeq);
+			report.setSearchParameters(globaldata->getSearch(), templateDB->getSearchScore());
+				
+			Nast nast(alignment, candidateSeq, templateSeq);
+			report.setAlignmentParameters(globaldata->getAlign(), alignment);
+			report.setNastParameters(nast);
+
+			candidateAlignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
+			candidateAlignmentFile.flush();
+
+			if((i+1) % 100 == 0){
+				cout << "It has taken " << time(NULL) - start << " secs to align " << i+1 << " sequences" << endl;
+			}
+			report.print();
+		
+			delete candidateSeq;		
+		}
+		
+		cout << "It took " << time(NULL) - start << " secs to align " << numFastaSeqs << " sequences" << endl;
+		cout << endl;
+
+		delete templateDB;
+		delete alignment;
+
+				
+		return 0;
+	}
+	catch(exception& e) {
+		cout << "Standard Error: " << e.what() << " has occurred in the AlignCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+		exit(1);
+	}
+	catch(...) {
+		cout << "An unknown error has occurred in the AlignCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+		exit(1);
+	}	
+}
+
+//**********************************************************************************************************************
\ No newline at end of file
diff --git a/aligncommand.h b/aligncommand.h
new file mode 100644
index 0000000..cf30a6b
--- /dev/null
+++ b/aligncommand.h
@@ -0,0 +1,35 @@
+#ifndef ALIGNCOMMAND_H
+#define ALIGNCOMMAND_H
+
+/*
+ *  aligncommand.h
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 5/15/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "globaldata.hpp"
+
+class AlignCommand : public Command {
+	
+	public:
+		AlignCommand();	
+		~AlignCommand();
+		int execute();	
+	
+	private:
+		GlobalData* globaldata;
+		string candidateFileName, templateFileName, distanceFileName;
+		int kmerSize;
+		float match, misMatch, gapOpen, gapExtend;
+		ofstream out;
+		ifstream in;
+
+};
+
+
+
+#endif
\ No newline at end of file
diff --git a/alignment.cpp b/alignment.cpp
new file mode 100644
index 0000000..6db3b3d
--- /dev/null
+++ b/alignment.cpp
@@ -0,0 +1,163 @@
+/*
+ *  alignment.cpp
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *  This is a class for an abstract datatype for classes that implement various types of alignment	algorithms.
+ *	As of 12/18/08 these included alignments based on blastn, needleman-wunsch, and the	Gotoh algorithms
+ *  
+ */
+
+using namespace std;
+
+#include <string>
+#include <vector>
+
+#include "alignmentcell.hpp"
+#include "alignment.hpp"
+
+#include <iostream>
+
+/**************************************************************************************************/
+
+Alignment::Alignment() {	/*	do nothing	*/	}
+
+/**************************************************************************************************/
+
+Alignment::Alignment(int A) : nCols(A), nRows(A) {
+	
+	alignment.resize(nRows);			//	For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
+	for(int i=0;i<nRows;i++){			//	matrix by initializing a matrix that is A x A.  By default we will set A
+		alignment[i].resize(nCols);		//	at 2000 for 16S rRNA gene sequences
+	}	
+	
+}
+
+/**************************************************************************************************/
+
+void Alignment::traceBack(){			//	This traceback routine is used by the dynamic programming algorithms
+										//	to fill the values of seqAaln and seqBaln
+	seqAaln = "";
+	seqBaln = "";
+	int row = lB-1;
+	int column = lA-1;
+//	seqAstart = 1;
+//	seqAend = column;
+	
+	AlignmentCell currentCell = alignment[row][column];	//	Start the traceback from the bottom-right corner of the
+														//	matrix
+	
+	if(currentCell.prevCell == 'x'){	seqAaln = seqBaln = "NOALIGNMENT";		}//If there's an 'x' in the bottom-
+	else{												//	right corner bail out because it means nothing got aligned
+		while(currentCell.prevCell != 'x'){				//	while the previous cell isn't an 'x', keep going...
+			
+			if(currentCell.prevCell == 'u'){			//	if the pointer to the previous cell is 'u', go up in the
+				seqAaln = '-' + seqAaln;				//	matrix.  this indicates that we need to insert a gap in
+				seqBaln = seqB[row] + seqBaln;			//	seqA and a base in seqB
+				currentCell = alignment[--row][column];
+			}
+			else if(currentCell.prevCell == 'l'){		//	if the pointer to the previous cell is 'l', go to the left
+				seqBaln = '-' + seqBaln;				//	in the matrix.  this indicates that we need to insert a gap
+				seqAaln = seqA[column] + seqAaln;		//	in seqB and a base in seqA
+				currentCell = alignment[row][--column];
+			}
+			else{
+				seqAaln = seqA[column] + seqAaln;		//	otherwise we need to go diagonally up and to the left,
+				seqBaln = seqB[row] + seqBaln;			//	here we add a base to both alignments
+				currentCell = alignment[--row][--column];
+			}
+		}
+	}
+	
+	pairwiseLength = seqAaln.length();
+	seqAstart = 1;	seqAend = 0;
+	seqBstart = 1;	seqBend = 0;
+	
+	for(int i=0;i<seqAaln.length();i++){
+		if(seqAaln[i] != '-' && seqBaln[i] == '-')		{	seqAstart++;	}
+		else if(seqAaln[i] == '-' && seqBaln[i] != '-')	{	seqBstart++;	}
+		else											{	break;			}
+	}
+	
+	pairwiseLength -= (seqAstart + seqBstart - 2);
+	
+	for(int i=seqAaln.length()-1; i>=0;i--){
+		if(seqAaln[i] != '-' && seqBaln[i] == '-')		{	seqAend++;		}
+		else if(seqAaln[i] == '-' && seqBaln[i] != '-')	{	seqBend++;		}
+		else											{	break;			}
+	}
+	pairwiseLength -= (seqAend + seqBend);
+
+	seqAend = seqA.length() - seqAend - 1;
+	seqBend = seqB.length() - seqBend - 1;
+
+}
+
+/**************************************************************************************************/
+
+string Alignment::getSeqAAln(){
+	return seqAaln;										//	this is called to get the alignment of seqA
+}
+
+/**************************************************************************************************/
+
+string Alignment::getSeqBAln(){
+	return seqBaln;										//	this is called to get the alignment of seqB							
+}
+
+/**************************************************************************************************/
+
+int Alignment::getCandidateStartPos(){
+	return seqAstart;									//	this is called to report the quality of the alignment
+}
+
+/**************************************************************************************************/
+
+int Alignment::getCandidateEndPos(){
+	return seqAend;										//	this is called to report the quality of the alignment
+}
+
+/**************************************************************************************************/
+
+int Alignment::getTemplateStartPos(){
+	return seqBstart;									//	this is called to report the quality of the alignment
+}
+
+/**************************************************************************************************/
+
+int Alignment::getTemplateEndPos(){
+	return seqBend;										//	this is called to report the quality of the alignment
+}
+
+/**************************************************************************************************/
+
+int Alignment::getPairwiseLength(){
+	return pairwiseLength;								//	this is the pairwise alignment length
+}
+
+/**************************************************************************************************/
+
+//int Alignment::getLongestTemplateGap(){
+//
+//	int length = seqBaln.length();
+//	int longGap = 0;
+//	int gapLength = 0;
+//	
+//	int start = seqAstart;
+//	if(seqAstart < seqBstart){	start = seqBstart;	}
+//	for(int i=seqAstart;i<length;i++){
+//		if(seqBaln[i] == '-'){
+//			gapLength++;
+//		}
+//		else{
+//			if(gapLength > 0){
+//				if(gapLength > longGap){	longGap = gapLength;	}
+//			}
+//			gapLength = 0;
+//		}
+//	}
+//	return longGap;
+//}
+
+/**************************************************************************************************/
diff --git a/alignment.hpp b/alignment.hpp
new file mode 100644
index 0000000..3367812
--- /dev/null
+++ b/alignment.hpp
@@ -0,0 +1,53 @@
+#ifndef DPALIGNMENT_H
+#define DPALIGNMENT_H
+
+/*
+ *  dpalignment.h
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *  This is a class for an abstract datatype for classes that implement various types of alignment	algorithms.
+ *	As of 12/18/08 these included alignments based on blastn, needleman-wunsch, and the	Gotoh algorithms
+ * 
+ */
+using namespace std;
+
+#include "mothur.h"
+#include "alignmentcell.hpp"
+
+/**************************************************************************************************/
+
+class Alignment {
+	
+public:
+	Alignment(int);
+	Alignment();
+	virtual void align(string, string) = 0;
+	
+	float getAlignmentScore();
+	string getSeqAAln();
+	string getSeqBAln();
+	int getCandidateStartPos();
+	int getCandidateEndPos();
+	int getTemplateStartPos();
+	int getTemplateEndPos();
+
+	int getPairwiseLength();
+//	int getLongestTemplateGap();
+
+protected:
+	void traceBack();
+	string seqA, seqAaln;
+	string seqB, seqBaln;
+	int seqAstart, seqAend;
+	int seqBstart, seqBend;
+	int pairwiseLength;
+	int nRows, nCols, lA, lB;
+	vector<vector<AlignmentCell> > alignment;
+};
+
+/**************************************************************************************************/
+
+#endif
diff --git a/alignmentcell.cpp b/alignmentcell.cpp
new file mode 100644
index 0000000..544c218
--- /dev/null
+++ b/alignmentcell.cpp
@@ -0,0 +1,18 @@
+/*
+ *  alignmentcell.cpp
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This class is pretty basic.  Each AlignmentCell object contains a pointer to the previous cell and different values
+ *	used to calcualte the alignment.  Initially everything is set to zero and all pointers are set to 'x'
+ *  
+ */
+
+#include "alignmentcell.hpp"
+
+//********************************************************************************************************************
+
+AlignmentCell::AlignmentCell() : prevCell('x'), cValue(0.0000), dValue(0.0000), iValue(0.0000) {};
+
+//********************************************************************************************************************
diff --git a/alignmentcell.hpp b/alignmentcell.hpp
new file mode 100644
index 0000000..c309048
--- /dev/null
+++ b/alignmentcell.hpp
@@ -0,0 +1,30 @@
+#ifndef ALIGNMENTCELL_H
+#define ALIGNMENTCELL_H
+
+/*
+ *  alignmentcell.hpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This class is pretty basic.  Each AlignmentCell object contains a pointer to the previous cell and different values
+ *	used to calcualte the alignment.  Initially everything is set to zero and all pointers are set to 'x'
+ *  
+ */
+
+//********************************************************************************************************************
+
+class AlignmentCell {
+	
+public:
+	AlignmentCell();
+	char prevCell;
+	float cValue;
+	float dValue;
+	float iValue;
+};
+
+//********************************************************************************************************************
+
+#endif
diff --git a/blastalign.cpp b/blastalign.cpp
new file mode 100644
index 0000000..0a1350f
--- /dev/null
+++ b/blastalign.cpp
@@ -0,0 +1,156 @@
+/*
+ *  blastalign.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/16/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This is a basic alignment method that gets the blast program to do the heavy lifting.  In the future, we should
+ *	probably incorporate NCBI's library so that we don't have to call on a user-supplied executable.  This is a child
+ *	of the Alignment class, which requires a constructor and align method.
+ *
+ */
+
+using namespace std;
+
+#include "alignment.hpp"
+#include "blastalign.hpp"
+
+
+//**************************************************************************************************/
+
+BlastAlignment::BlastAlignment(float go, float ge, float m, float mm) : 
+			match(m),				//	This is the score to award for two nucleotides matching (match >= 0)
+			mismatch(mm)			//	This is the penalty to assess for a mismatch (mismatch <= 0)
+{
+	gapOpen = abs(go);				//	This is the penalty to assess for opening a gap (gapOpen >= 0)
+	gapExtend = abs(ge);				//	This is the penalty to assess for extending a gap (gapExtend >= 0)
+		
+	int randNumber = rand();
+	candidateFileName = toString(randNumber) + ".candidate";
+	templateFileName = toString(randNumber) + ".template";
+	blastFileName = toString(randNumber) + ".pairwise";
+}
+
+//**************************************************************************************************/
+
+BlastAlignment::~BlastAlignment(){		//	The desctructor should clean up by removing the temporary 
+	remove(candidateFileName.c_str());	//	files used to run bl2seq
+	remove(templateFileName.c_str());
+	remove(blastFileName.c_str());
+}
+
+//**************************************************************************************************/
+
+void BlastAlignment::align(string seqA, string seqB){	//Use blastn to align the two sequences
+
+	ofstream candidateFile(candidateFileName.c_str());	//	Write the sequence to be aligned to a temporary candidate seq file
+	candidateFile << ">candidate" << endl << seqA << endl;
+	candidateFile.close();
+	
+	ofstream templateFile(templateFileName.c_str());	//	Write the unaligned template sequence to a temporary candidate seq file
+	templateFile << ">template" << endl << seqB << endl;
+	templateFile.close();
+	
+	//	The blastCommand assumes that we have DNA sequences (blastn) and that they are fairly similar (-e 0.001) and
+	//	that we don't want to apply any kind of complexity filtering (-F F)
+	string blastCommand = "~/Pipeline/src/cpp/production/blast/bin/bl2seq -p blastn -i " + candidateFileName + " -j " + templateFileName + " -e 0.0001 -F F -o " + blastFileName + " -W 11";
+	blastCommand += " -r " + toString(match) + " -q " + toString(mismatch);
+	blastCommand +=	" -G " + toString(gapOpen) + " -E " + toString(gapExtend);
+	
+	system(blastCommand.c_str());	//	Here we assume that "bl2seq" is in the users path or in the same folder as
+									//	this executable
+	setPairwiseSeqs();
+}
+
+/**************************************************************************************************/
+
+void BlastAlignment::setPairwiseSeqs(){	//	This method call assigns the blast generated alignment
+															//	to the pairwise entry in the Sequence class for the 
+															//	candidate and template Sequence objects
+	ifstream blastFile;
+	openInputFile(blastFileName, blastFile);
+	
+	seqAaln = "";
+	seqBaln = "";
+	
+	int candidateLength, templateLength;
+	char d;
+	
+	string candidateName, templateName;
+	
+	while(d=blastFile.get() != '='){};
+	blastFile >> candidateName;					//	Get the candidate sequence name from flatfile
+	
+	while(d=blastFile.get() != '('){};
+	blastFile >> candidateLength;				//	Get the candidate sequence length from flatfile
+	
+	while(d=blastFile.get()){
+		if(d == '>'){
+			blastFile >> templateName;			//	Get the template sequence name from flatfile
+			break;
+		}
+		else if(d == '*'){									//	We go here if there is no significant match
+			
+			seqAstart = 0;
+			seqBstart = 0;
+			seqAend = 0;
+			seqBend = 0;
+			pairwiseLength = 0;
+			
+//			string dummy;
+//			while(dummy != "query:"){	cout << dummy << endl;blastFile >> dummy;	}
+//			blastFile >> seqBend;
+//			cout << seqBend << endl;
+//			for(int i=0;i<seqBend;i++){
+//				seqAaln += 'Z';
+//				seqBaln += 'X';
+//			}
+//			pairwiseLength = 0;
+			return;
+		}
+	}
+	
+	while(d=blastFile.get() != '='){};
+	blastFile >> templateLength;				//	Get the template sequence length from flatfile
+		
+	while(d=blastFile.get() != 'Q'){};			//	Suck up everything else until we get to the start of the alignment
+	int queryStart, sbjctStart, queryEnd, sbjctEnd;
+	string queryLabel, sbjctLabel, query, sbjct;
+
+	blastFile >> queryLabel;	queryLabel = 'Q' + queryLabel;
+
+	
+	while(queryLabel == "Query:"){
+		blastFile >> queryStart >> query >> queryEnd;
+		
+		while(d=blastFile.get() != 'S'){};
+		
+		blastFile >> sbjctLabel >> sbjctStart >> sbjct >> sbjctEnd;
+		
+		if(seqAaln == ""){
+			seqAstart = queryStart;
+			seqBstart = sbjctStart;
+		}
+
+		seqAaln += query;					//	concatenate each line of the sequence to what we already have
+		seqBaln += sbjct;					//	for the query and template (subject) sequence
+		
+		blastFile >> queryLabel;
+	}
+	seqAend = queryEnd;
+	seqBend = sbjctEnd;
+	pairwiseLength = seqAaln.length();
+
+	for(int i=1;i<seqBstart;i++){				//	Since the alignments don't always start at (1, 1), we need to pad
+		seqAaln = 'Z' + seqAaln;				//	the sequences so that they start at the same point
+		seqBaln = 'X' + seqBaln;
+	}
+	
+	for(int i=seqBend+1;i<=templateLength;i++){	//	since the sequences don't necessarily end at the same point, we
+		seqAaln += 'Z';							//	again need ot pad the sequences so that they extend to the length
+		seqBaln += 'X';							//	of the template sequence
+	}
+}
+
+//**************************************************************************************************/
diff --git a/blastalign.hpp b/blastalign.hpp
new file mode 100644
index 0000000..0f755d8
--- /dev/null
+++ b/blastalign.hpp
@@ -0,0 +1,34 @@
+/*
+ *  blastalign.hpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/16/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This is a basic alignment method that gets the blast program to do the heavy lifting.  In the future, we should
+ *	probably incorporate NCBI's library so that we don't have to call on a user-supplied executable.  This is a child
+ *	of the Alignment class, which requires a constructor and align method.
+ *
+ */
+ 
+#include "mothur.h"
+
+class BlastAlignment : public Alignment {
+
+public:
+	BlastAlignment(float, float, float, float);
+	~BlastAlignment();
+	void align(string, string);
+private:
+
+	string candidateFileName;
+	string templateFileName;
+	string blastFileName;
+
+	void setPairwiseSeqs();
+	float match;
+	float mismatch;
+	float gapOpen;
+	float gapExtend;
+};
+
diff --git a/blastdb.cpp b/blastdb.cpp
new file mode 100644
index 0000000..ef8f737
--- /dev/null
+++ b/blastdb.cpp
@@ -0,0 +1,95 @@
+/*
+ *  blastdb.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/22/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+using namespace std;
+
+
+#include "database.hpp"
+#include "sequence.hpp"
+#include "blastdb.hpp"
+
+/**************************************************************************************************/
+
+BlastDB::BlastDB(string fastaFileName, float gO, float gE, float m, float mM) : Database(fastaFileName), 
+gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
+
+	cout << "Generating the temporary BLAST database...\t";	cout.flush();
+
+	int randNumber = rand();
+	dbFileName = toString(randNumber) + ".template.unaligned.fasta";
+	queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
+	blastFileName = toString(randNumber) + ".blast";
+
+
+	ofstream unalignedFastaFile;
+	openOutputFile(dbFileName, unalignedFastaFile);				
+	
+	for(int i=0;i<numSeqs;i++){									//	generating a fasta file with unaligned template
+		unalignedFastaFile << '>' << i << endl;					//	sequences, which will be input to formatdb
+		unalignedFastaFile << templateSequences[i]->getUnaligned() << endl;
+	}
+	unalignedFastaFile.close();
+	
+	string formatdbCommand = "~/Pipeline/src/cpp/production/blast/bin/formatdb -p F -o T -i " + dbFileName;	//	format the database, -o option gives us the ability
+	system(formatdbCommand.c_str());								//	to get the right sequence names, i think. -p F
+																	//	option tells formatdb that seqs are DNA, not prot
+	cout << "DONE." << endl << endl;	cout.flush();
+	emptySequence = new Sequence();
+	emptySequence->setName("no_match");
+	emptySequence->setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+	emptySequence->setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+
+
+}
+
+/**************************************************************************************************/
+
+BlastDB::~BlastDB(){
+	remove(queryFileName.c_str());				//	let's clean stuff up and remove the temp files
+	remove(dbFileName.c_str());					//	let's clean stuff up and remove the temp files
+	remove(blastFileName.c_str());				//	let's clean stuff up and remove the temp files
+}
+
+/**************************************************************************************************/
+
+Sequence* BlastDB::findClosestSequence(Sequence* candidate){
+
+	ofstream queryFile;
+	openOutputFile(queryFileName, queryFile);
+	queryFile << '>' << candidate->getName() << endl;
+	queryFile << candidate->getUnaligned() << endl;
+	queryFile.close();
+	
+	
+//	the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
+//	wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
+//	long.  With this setting, it seems comparable in speed to the suffix tree approach.
+
+	string blastCommand = "~/Pipeline/src/cpp/production/blast/bin/blastall -p blastn -d " + dbFileName + " -b 1 -m 8 -W 28";
+	blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
+	system(blastCommand.c_str());
+	
+	ifstream m8FileHandle;
+	openInputFile(blastFileName, m8FileHandle);
+	
+	string dummy;
+	int templateAccession;
+	gobble(m8FileHandle);
+	if(!m8FileHandle.eof()){
+		m8FileHandle >> dummy >> templateAccession >> searchScore;
+		m8FileHandle.close();
+		return templateSequences[templateAccession];
+	}
+	else{
+		searchScore = 0.00;
+		return emptySequence;
+	}
+}
+
+/**************************************************************************************************/
diff --git a/blastdb.hpp b/blastdb.hpp
new file mode 100644
index 0000000..a952897
--- /dev/null
+++ b/blastdb.hpp
@@ -0,0 +1,34 @@
+#ifndef BLASTDB_HPP
+#define BLASTDB_HPP
+
+
+/*
+ *  blastdb.hpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/22/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+
+class BlastDB : public Database {
+public:
+	BlastDB(string, float, float, float, float);
+	~BlastDB();
+	Sequence* findClosestSequence(Sequence*);
+
+private:
+	string dbFileName;
+	string queryFileName;
+	string blastFileName;
+	
+	float gapOpen;
+	float gapExtend;
+	float match;
+	float misMatch;
+	Sequence* emptySequence;
+};
+
+#endif
diff --git a/commandfactory.cpp b/commandfactory.cpp
index 2e8aca7..d41e4a8 100644
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -41,6 +41,7 @@
 #include "bootstrapsharedcommand.h"
 #include "concensuscommand.h"
 #include "distancecommand.h"
+#include "aligncommand.h"
 
 
 /***********************************************************/
@@ -93,6 +94,7 @@ Command* CommandFactory::getCommand(string commandName){
 		else if(commandName == "bootstrap.shared")		{   command = new BootSharedCommand();			}
 		else if(commandName == "concensus")				{   command = new ConcensusCommand();			}
 		else if(commandName == "dist.seqs")				{   command = new DistanceCommand();			}
+		else if(commandName == "align.seqs")			{   command = new AlignCommand();				}
 		else											{	command = new NoCommand();					}
 
 		return command;
diff --git a/database.cpp b/database.cpp
index 933bed4..8c080db 100644
--- a/database.cpp
+++ b/database.cpp
@@ -15,24 +15,23 @@ using namespace std;
 
 /**************************************************************************************************/
 
-Database::Database(string fastaFileName){
-	
-	ifstream fastaFile(fastaFileName.c_str());
-	if(!fastaFile) {
-		cerr << "Error: Could not open " << fastaFileName << endl;
-		exit(1);
-	}
+Database::Database(string fastaFileName){		//	This assumes that the template database is in fasta format, may 
+												//	need to alter this in the future?
+
+	ifstream fastaFile;
+	openInputFile(fastaFileName, fastaFile);
+
 	cout << endl << "Reading in the " << fastaFileName << " template sequences...\t";	cout.flush();
 
-	numSeqs=count(istreambuf_iterator<char>(fastaFile),istreambuf_iterator<char>(), '>');
-	fastaFile.seekg(0);
+	numSeqs=count(istreambuf_iterator<char>(fastaFile),istreambuf_iterator<char>(), '>');	//	count the number of
+	fastaFile.seekg(0);																		//	sequences
 	
 	templateSequences.resize(numSeqs);
 	
 	string seqName, sequence;
 	for(int i=0;i<numSeqs;i++){
-		templateSequences[i] = new Sequence();
-		
+		templateSequences[i] = new Sequence();		//	We're storing the aligned template sequences as a vector of
+													//	pointers to Sequence objects
 		fastaFile >> seqName;
 		templateSequences[i]->setName(seqName);
 		
@@ -42,12 +41,13 @@ Database::Database(string fastaFileName){
 		while(fastaFile && (letter=fastaFile.get()) != '>'){
 			if(isprint(letter)){
 				letter = toupper(letter);
+				if(letter == 'U'){letter = 'T';}
 				aligned += letter;
 			}
 		}
-		templateSequences[i]->setAligned(aligned);
-		templateSequences[i]->setUnaligned(aligned);
-		fastaFile.putback(letter);
+		templateSequences[i]->setAligned(aligned);	//	Obviously, we need the fully aligned template sequence
+		templateSequences[i]->setUnaligned(aligned);//	We will also need the unaligned sequence for pairwise alignments
+		fastaFile.putback(letter);					//	and database searches
 	}
 	
 	fastaFile.close();
@@ -56,3 +56,7 @@ Database::Database(string fastaFileName){
 }
 
 /**************************************************************************************************/
+
+float Database::getSearchScore()	{	return searchScore;		}	//	we're assuming that the search is already done
+
+/**************************************************************************************************/
diff --git a/database.hpp b/database.hpp
index e80bfb5..09fea85 100644
--- a/database.hpp
+++ b/database.hpp
@@ -12,14 +12,17 @@
 
 #include "mothur.h"
 
+class Sequence;
+
 class Database {
 public:
 	Database(string);
 	virtual Sequence* findClosestSequence(Sequence*) = 0;
-	
+	virtual float getSearchScore();
 protected:
 	int numSeqs;
-	vector<Sequence*> templateSequences;	
+	float searchScore;
+	vector<Sequence*> templateSequences;
 };
 
 #endif
diff --git a/distancecommand.cpp b/distancecommand.cpp
index e5878b3..7d1f6a0 100644
--- a/distancecommand.cpp
+++ b/distancecommand.cpp
@@ -88,11 +88,12 @@ int DistanceCommand::execute(){
 		
 		remove(distFile.c_str());
 		
-	//	#	if defined (WIN_VERSION)
+		//#	if defined (_WIN32)
+			//figure out how to implement the fork and wait commands in windows
 		//	driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
-	//	#	endif
+		//#	endif
 		
-		#	if defined (__APPLE__) || (__MACH__)
+		#	if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 			if(processors == 1){
 				driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);	
 			}	
@@ -168,6 +169,8 @@ int DistanceCommand::execute(){
 				wait(NULL);
 			}
 			wait(NULL);
+		#	else
+			driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
 		#	endif
 	
 		delete distCalculator;
diff --git a/distancedb.cpp b/distancedb.cpp
new file mode 100644
index 0000000..ce74854
--- /dev/null
+++ b/distancedb.cpp
@@ -0,0 +1,65 @@
+/*
+ *  distancedb.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/29/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+
+using namespace std;
+
+
+#include "database.hpp"
+#include "sequence.hpp"
+#include "distancedb.hpp"
+
+
+/**************************************************************************************************/
+
+DistanceDB::DistanceDB(string fastaFileName, string distanceFileName) : Database(fastaFileName) {
+	
+	ifstream inputData;
+	openInputFile(distanceFileName, inputData);
+	int numCandSeqs=count(istreambuf_iterator<char>(inputData),istreambuf_iterator<char>(), '\n');	//	count the number of
+	inputData.seekg(0);																		//	sequences
+
+	hit closestMatch;
+	string candidateSeqName;
+	string junk;
+	
+	mostSimSequenceVector.resize(numCandSeqs);
+	
+	for(int i=0;i<numCandSeqs;i++){
+		inputData >> candidateSeqName >> closestMatch.seqName >> closestMatch.indexNumber >> closestMatch.simScore;
+//		getline(inputData, junk);	
+		mostSimSequenceVector[i] = closestMatch;
+	}
+	cout << numCandSeqs << endl;
+	searchIndex = 0;
+	inputData.close();
+}
+
+/**************************************************************************************************/
+
+Sequence* DistanceDB::findClosestSequence(Sequence* candidateSeq){
+	
+	hit simAccession = mostSimSequenceVector[searchIndex];
+//	string candidateSeqName, closestMatchSeqName, junk;
+//	int closestMatchIndexNumber;
+//	float closestMatchSimScore;
+//	
+//	inputData >> candidateSeqName >> closestMatchSeqName >> closestMatchIndexNumber >> closestMatchSimScore;
+//	getline(inputData, junk);	
+
+//	searchScore = 100. * closestMatchSimScore;
+
+	searchScore = 100. * simAccession.simScore;
+	searchIndex++;
+//	return templateSequences[closestMatchIndexNumber];
+	return templateSequences[simAccession.indexNumber];
+	
+}
+
+/**************************************************************************************************/
diff --git a/distancedb.hpp b/distancedb.hpp
new file mode 100644
index 0000000..dc4c9a6
--- /dev/null
+++ b/distancedb.hpp
@@ -0,0 +1,36 @@
+#ifndef DISTANCEDB_HPP
+#define DISTANCEDB_HPP
+
+/*
+ *  distancedb.hpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/29/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+
+using namespace std;
+
+#include "mothur.h"
+
+class DistanceDB : public Database {
+	
+public:
+	DistanceDB(string, string);
+	Sequence* findClosestSequence(Sequence*);
+	
+private:
+
+	struct hit{
+		string seqName;
+		int indexNumber;
+		float simScore;
+	};
+	vector<hit> mostSimSequenceVector;
+//	ifstream inputData;
+	int searchIndex;
+};
+
+#endif
diff --git a/errorchecking.cpp b/errorchecking.cpp
index 8aaa8b0..dfe9607 100644
--- a/errorchecking.cpp
+++ b/errorchecking.cpp
@@ -118,7 +118,14 @@ bool ErrorCheck::checkInput(string input) {
 				if (parameter == "scale" )			{ scale = value;	}
 				if (parameter == "ends" )			{ ends = value; }
 				if (parameter == "processors" )		{ processors = value;	}
-
+				if (parameter == "template")		{ templatefile = value;	}
+				if (parameter == "search")			{ search = value;		}
+				if (parameter == "ksize")			{ ksize = value;		}
+				if (parameter == "align")		    { align = value;		}
+				if (parameter == "match")			{ match = value;		}
+				if (parameter == "mismatch")		{ mismatch = value;	    }
+				if (parameter == "gapopen")			{ gapopen = value;		}
+				if (parameter == "gapextend" )		{ gapextend = value;	}
 			}
 			
 			//gets the last parameter and value
@@ -160,6 +167,14 @@ bool ErrorCheck::checkInput(string input) {
 				if (parameter == "scale" )			{ scale = value;	}
 				if (parameter == "ends" )			{ ends = value; }
 				if (parameter == "processors" )		{ processors = value;	}
+				if (parameter == "template")		{ templatefile = value;	}
+				if (parameter == "search")			{ search = value;		}
+				if (parameter == "ksize")			{ ksize = value;		}
+				if (parameter == "align")		    { align = value;		}
+				if (parameter == "match")			{ match = value;		}
+				if (parameter == "mismatch")		{ mismatch = value;	    }
+				if (parameter == "gapopen")			{ gapopen = value;		}
+				if (parameter == "gapextend" )		{ gapextend = value;	}
 
 			}
 		}
@@ -246,9 +261,11 @@ bool ErrorCheck::checkInput(string input) {
 			}
 		}
 		
-		if ((commandName == "filter.seqs") || (commandName == "dist.seqs")) { 
+		if ((commandName == "filter.seqs") || (commandName == "dist.seqs") || (commandName == "align.seqs")) { 
 			if ((fastafile == "") && (nexusfile == "") && (clustalfile == "") && (phylipfile == "")) {
-				 cout << "You must read either a fasta, nexus, clustal, or phylip file before you can use the filter.seqs command." << endl; return false; 
+				 cout << "You must enter either a fasta, nexus, clustal, or phylip file before you can use the filter.seqs, dist.seqs or align.seqs command." << endl; return false; 
+			}else if ((commandName == "align.seqs") && (templatefile == "")) {
+				cout << "You must enter a template to use the align.seqs command." << endl; return false; 
 			}
 			validateSeqsFiles();
 		}
@@ -558,9 +575,15 @@ void ErrorCheck::validateSeqsFiles() {
 					errorFree = false;
 				}
 			}
-
+		}else if (templatefile != "") {
+			ableToOpen = openInputFile(templatefile, filehandle);
+			filehandle.close();
+			if (ableToOpen == 1) { //unable to open
+				errorFree = false;
+			}
 		}
 		
+		
 	}
 	catch(exception& e) {
 		cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateSeqsFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -642,6 +665,7 @@ void ErrorCheck::clear() {
 	fastafile       =   "";
 	nexusfile       =   "";
 	clustalfile     =   "";
+	templatefile	=	"";
 	line			=	"";
 	label			=	"";
 	method			=   "furthest";
diff --git a/errorchecking.h b/errorchecking.h
index 8de9234..d6388f7 100644
--- a/errorchecking.h
+++ b/errorchecking.h
@@ -36,7 +36,7 @@ class ErrorCheck {
 		void refresh();
 		string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, nexusfile, clustalfile, treefile, sharedfile, cutoff, format; 
 		string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary, randomtree, abund, sorted, trump, soft, filter, scale, ends, processors;
-
+		string templatefile, search, ksize, align, match, mismatch, gapopen, gapextend;
 		string commandName, optionText;
 		bool errorFree;
 
diff --git a/globaldata.cpp b/globaldata.cpp
index 4dfcbc1..b4239f3 100644
--- a/globaldata.cpp
+++ b/globaldata.cpp
@@ -85,10 +85,14 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
 				if (key == "scale")			{ scale = value;		}
 				if (key == "ends" )			{ ends = value;			}
 				if (key == "processors" )	{ processors = value;	}
-				
-
-				
-
+				if (key == "template")		{ templatefile = value;	}
+				if (key == "search")		{ search = value;		}
+				if (key == "ksize")			{ ksize = value;		}
+				if (key == "align")		    { align = value;		}
+				if (key == "match")			{ match = value;		}
+				if (key == "mismatch")		{ mismatch = value;	    }
+				if (key == "gapopen")		{ gapopen = value;		}
+				if (key == "gapextend" )	{ gapextend = value;	}
 				
 				if (key == "line") {//stores lines to be used in a set
 					lines.clear();
@@ -151,7 +155,14 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
 			if (key == "scale")			{ scale = value;		}
 			if (key == "ends" )			{ ends = value;			}
 			if (key == "processors" )	{ processors = value;	}
-
+			if (key == "template")		{ templatefile = value;	}
+			if (key == "search")		{ search = value;		}
+			if (key == "ksize")			{ ksize = value;		}
+			if (key == "align")		    { align = value;		}
+			if (key == "match")			{ match = value;		}
+			if (key == "mismatch")		{ mismatch = value;	    }
+			if (key == "gapopen")		{ gapopen = value;		}
+			if (key == "gapextend" )	{ gapextend = value;	}
 
 			if (key == "line") {//stores lines to be used in a vector
 				lines.clear();
@@ -289,6 +300,14 @@ string GlobalData::getFilter()			{   return filter;		}
 string GlobalData::getScale()			{	return scale;		}
 string GlobalData::getEnds()			{   return ends;		}
 string GlobalData::getProcessors()		{	return processors;	}
+string GlobalData::getTemplateFile()	{	return templatefile;}
+string GlobalData::getSearch()			{	return search;		}
+string GlobalData::getKSize()			{	return ksize;		}
+string GlobalData::getAlign()			{	return align;		}
+string GlobalData::getMatch()			{	return match;		}
+string GlobalData::getMismatch()		{	return mismatch;	}
+string GlobalData::getGapopen()			{	return gapopen;		}
+string GlobalData::getGapextend()		{	return gapextend;	}
 
 void GlobalData::setListFile(string file)		{	listfile = file;	inputFileName = file;}
 void GlobalData::setGroupFile(string file)		{	groupfile = file;	}
@@ -334,6 +353,7 @@ void GlobalData::clear() {
 	clustalfile		=   "";
 	treefile		=	"";
 	sharedfile		=	"";
+	templatefile	=	"";
 	cutoff			=	"10.00";
 	format			=	"";
 	precision		=	"100";
@@ -357,6 +377,14 @@ void GlobalData::clear() {
 	scale			=	"log10";
 	ends			=   "T";  //yes
 	processors		=	"1";
+	search			=	"suffix";
+	ksize			=	"7";
+	align			=	"blast";
+	match			=	"1.0";
+	mismatch		=	"-1.0";
+	gapopen			=	"-5.0";
+	gapextend		=	"-2.0";
+	
 
 }
 
@@ -379,6 +407,13 @@ void GlobalData::reset() {
 	form			=	"integral";
 	ends			=   "T";
 	processors		=	"1";
+	search			=	"suffix";
+	ksize			=	"7";
+	align			=	"blast";
+	match			=	"1.0";
+	mismatch		=	"-1.0";
+	gapopen			=	"-5.0";
+	gapextend		=	"-2.0";
 }
 /*******************************************************/
 
diff --git a/globaldata.hpp b/globaldata.hpp
index 3a469f0..cb9aff6 100644
--- a/globaldata.hpp
+++ b/globaldata.hpp
@@ -77,7 +77,14 @@ public:
 	string getSorted();
 	string getEnds();
 	string getProcessors();
-
+	string getTemplateFile();
+	string getSearch();
+	string getKSize();
+	string getAlign();
+	string getMatch();
+	string getMismatch();
+	string getGapopen();
+	string getGapextend();
 	string getTrump();
 	string getSoft();
 	string getFilter();
@@ -116,7 +123,8 @@ public:
 private:
 
 	string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, nexusfile, clustalfile, treefile, sharedfile, line, label, randomtree, groups;
-	string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, trump, soft, filter, scale, ends, processors;
+	string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, trump, soft, filter, scale, ends, processors, templatefile, search, ksize, align, match;
+	string mismatch, gapopen, gapextend;
 
 
 	static GlobalData* _uniqueInstance;
diff --git a/gotohoverlap.cpp b/gotohoverlap.cpp
new file mode 100644
index 0000000..57d884b
--- /dev/null
+++ b/gotohoverlap.cpp
@@ -0,0 +1,90 @@
+/*
+ *  gotohoverlap.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This class is an Alignment child class that implements the Gotoh pairwise alignment algorithm as described in:
+ *		
+ *		Gotoh O. 1982.  An improved algorithm for matching biological sequences.  J. Mol. Biol.  162:705-8.
+ *		Myers, EW & Miller, W.  1988.  Optimal alignments in linear space.  Comput Appl Biosci. 4:11-7.
+ *
+ *	This method is nice because it allows for an affine gap penalty to be assessed, which is analogous to what is used
+ *	in blast and is an alternative to Needleman-Wunsch, which only charges the same penalty for each gap position.
+ *	Because this method typically has problems at the ends when two sequences do not full overlap, we employ a separate
+ *	method to fix the ends (see Overlap class documentation)
+ *
+ */
+
+using namespace std;
+
+#include "alignmentcell.hpp"
+#include "overlap.hpp"
+#include "alignment.hpp"
+#include "gotohoverlap.hpp"
+
+/**************************************************************************************************/
+
+GotohOverlap::GotohOverlap(float gO, float gE, float m, float mm, int r) :
+	gapOpen(gO), gapExtend(gE), match(m), mismatch(mm), Alignment(r) {
+	
+	for(int i=1;i<nCols;i++){				//	we initialize the dynamic programming matrix by setting the pointers in
+		alignment[0][i].prevCell = 'l';		//	the first row to the left
+		alignment[0][i].cValue = 0;
+		alignment[0][i].dValue = 0;
+	}
+	
+	for(int i=1;i<nRows;i++){				//	we initialize the dynamic programming matrix by setting the pointers in
+		alignment[i][0].prevCell = 'u';		//	the first column upward
+		alignment[i][0].cValue = 0;
+		alignment[i][0].iValue = 0;
+	}
+}
+
+/**************************************************************************************************/
+
+void GotohOverlap::align(string A, string B){
+	
+	seqA = ' ' + A;	lA = seqA.length();		//	the algorithm requires that the first character be a dummy value
+	seqB = ' ' + B;	lB = seqB.length();		//	the algorithm requires that the first character be a dummy value
+	
+	for(int i=1;i<lB;i++){					//	the recursion here is shown in Webb and Miller, Fig. 1A.  Note that 
+		for(int j=1;j<lA;j++){				//	if we need to conserve on space we should see Fig. 1B, which is linear
+											//	in space, which I think is unnecessary
+			float diagonal;
+			if(seqB[i] == seqA[j])	{	diagonal = alignment[i-1][j-1].cValue + match;		}
+			else					{	diagonal = alignment[i-1][j-1].cValue + mismatch;	}
+			
+			alignment[i][j].iValue = max(alignment[i][j-1].iValue, alignment[i][j-1].cValue + gapOpen) + gapExtend;
+			alignment[i][j].dValue = max(alignment[i-1][j].dValue, alignment[i-1][j].cValue + gapOpen) + gapExtend;
+			
+			if(alignment[i][j].iValue > alignment[i][j].dValue){
+				if(alignment[i][j].iValue > diagonal){
+					alignment[i][j].cValue = alignment[i][j].iValue;
+					alignment[i][j].prevCell = 'l';
+				}
+				else{
+					alignment[i][j].cValue = diagonal;
+					alignment[i][j].prevCell = 'd';
+				}
+			}
+			else{
+				if(alignment[i][j].dValue > diagonal){
+					alignment[i][j].cValue = alignment[i][j].dValue;
+					alignment[i][j].prevCell = 'u';
+				}
+				else{
+					alignment[i][j].cValue = diagonal;
+					alignment[i][j].prevCell = 'd';
+				}
+			}
+			
+		}
+	}
+	Overlap over;
+	over.setOverlap(alignment, lA, lB, 0);	//	Fix the gaps at the ends of the sequences
+	traceBack();							//	Construct the alignment and set seqAaln and seqBaln
+}
+
+/**************************************************************************************************/
diff --git a/gotohoverlap.hpp b/gotohoverlap.hpp
new file mode 100644
index 0000000..bf3c636
--- /dev/null
+++ b/gotohoverlap.hpp
@@ -0,0 +1,43 @@
+#ifndef GOTOHOVERLAP_H
+#define GOTOHOVERLAP_H
+
+/*
+ *  gotohoverlap.h
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This class is an Alignment child class that implements the Gotoh pairwise alignment algorithm as described in:
+ *		
+ *		Gotoh O. 1982.  An improved algorithm for matching biological sequences.  J. Mol. Biol.  162:705-8.
+ *		Myers, EW & Miller, W.  1988.  Optimal alignments in linear space.  Comput Appl Biosci. 4:11-7.
+ *
+ *	This method is nice because it allows for an affine gap penalty to be assessed, which is analogous to what is used
+ *	in blast and is an alternative to Needleman-Wunsch, which only charges the same penalty for each gap position.
+ *	Because this method typically has problems at the ends when two sequences do not full overlap, we employ a separate
+ *	method to fix the ends (see Overlap class documentation)
+ *
+ */
+
+#include "mothur.h"
+#include "alignment.hpp"
+
+/**************************************************************************************************/
+
+class GotohOverlap : public Alignment {
+	
+public:
+	GotohOverlap(float, float, float, float, int);
+	void align(string, string);
+	
+private:
+	float gapOpen;
+	float gapExtend;
+	float match;
+	float mismatch;
+};
+
+/**************************************************************************************************/
+
+#endif
diff --git a/kmer.cpp b/kmer.cpp
index 875cec5..dcb6475 100644
--- a/kmer.cpp
+++ b/kmer.cpp
@@ -14,28 +14,29 @@ using namespace std;
 
 /**************************************************************************************************/
 
-Kmer::Kmer(int size) : kmerSize(size) {
-	
-	int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
-	maxKmer = power4s[kmerSize]+1;// (int)pow(4.,k)+1;
+Kmer::Kmer(int size) : kmerSize(size) {	//	The constructor sets the size of the kmer
 	
+	int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+																		//	No reason to waste the time of recalculating
+	maxKmer = power4s[kmerSize]+1;// (int)pow(4.,k)+1;					//	powers of 4 everytime through.  We need an
+																		//	extra kmer if we get a non-ATGCU base
 }
 
 /**************************************************************************************************/
 
-string Kmer::getKmerString(string sequence){
-	int length = sequence.length();
-	int nKmers = length - kmerSize + 1;
+string Kmer::getKmerString(string sequence){	//	Calculate kmer for each position in the sequence, count the freq
+	int length = sequence.length();				//	of each kmer, and convert it to an ascii character with base '!'.
+	int nKmers = length - kmerSize + 1;			//	Export the string of characters as a string
 	vector<int> counts(maxKmer, 0);
 	
-	for(int i=0;i<nKmers;i++){
-		int kmerNumber = getKmerNumber(sequence, i);
+	for(int i=0;i<nKmers;i++){					//	Go though sequence and get the number between 0 and maxKmer for that
+		int kmerNumber = getKmerNumber(sequence, i);//	kmer.  Increase the count for the kmer in the counts vector
 		counts[kmerNumber]++;
 	}
 	
-	string kmerString = "";
-	for(int i=0;i<maxKmer;i++){
-		kmerString += getASCII(counts[i]);
+	string kmerString = "";						
+	for(int i=0;i<maxKmer;i++){					//	Scan through the vector of counts and convert each element of the 
+		kmerString += getASCII(counts[i]);		//	vector to an ascii character that is equal to or greater than '!'
 	}		
 	
 	return kmerString;	
@@ -44,8 +45,24 @@ string Kmer::getKmerString(string sequence){
 /**************************************************************************************************/
 
 int Kmer::getKmerNumber(string sequence, int index){
-	int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
+	
+//	Here we convert a kmer to a number between 0 and maxKmer.  For example, AAAA would equal 0 and TTTT would equal 255.
+//	If there's an N in the kmer, it is set to 256 (if we are looking at 4mers).  The largest we can look at are 8mers,
+//	this could be easily increased.
+//	
+//	Example:   ATGCAT (kSize = 6)
+//		  i:   012345
+//
+//		Score	= 0*4^(6-0-1) + 3*4^(6-1-1) + 2*4^(6-2-1) + 1*4^(6-3-1) + 0*4^(6-4-1) + 3*4^(6-5-1)
+//				= 0*4^5	+	3*4^4	+	2*4^3	+	1*4^2	+	0*4^1	+	3*4^0
+//				= 0 + 3*256 + 2*64 + 1*16 + 0*4 + 3*1
+//				= 0 + 768 + 128 + 16 + 0 + 3
+//				= 915
+	
+	int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+	
 	int kmer = 0;
+	
 	for(int i=0;i<kmerSize;i++){
 		if(toupper(sequence[i+index]) == 'A')		{	kmer += (0 * power4s[kmerSize-i-1]);	}
 		else if(toupper(sequence[i+index]) == 'C')	{	kmer += (1 * power4s[kmerSize-i-1]);	}
@@ -60,19 +77,30 @@ int Kmer::getKmerNumber(string sequence, int index){
 /**************************************************************************************************/
 	
 string Kmer::getKmerBases(int kmerNumber){
-	int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
+	
+//	Here we convert the kmer number into the kmer in terms of bases.
+//
+//	Example:	Score = 915 (for a 6-mer)
+//				Base6 = (915 / 4^0) % 4 = 915 % 4 = 3 => T	[T]
+//				Base5 = (915 / 4^1) % 4 = 228 % 4 = 0 => A	[AT]
+//				Base4 = (915 / 4^2) % 4 = 57 % 4 = 1 => C	[CAT]
+//				Base3 = (915 / 4^3) % 4 = 14 % 4 = 2 => G	[GCAT]
+//				Base2 = (915 / 4^4) % 4 = 3 % 4 = 3 => T	[TGCAT]
+//				Base1 = (915 / 4^5) % 4 = 0 % 4 = 0 => A	[ATGCAT] -> this checks out with the previous method
+	
+	int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
 	
 	string kmer = "";
 	
-	if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){	
-		for(int i=0;i<kmerSize;i++){
+	if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){	//	if the kmer number is the same as the maxKmer then it must
+		for(int i=0;i<kmerSize;i++){					//	have had an N in it and so we'll just call it N x kmerSize
 			kmer += 'N';
 		}
 	}
 	else{
 		for(int i=0;i<kmerSize;i++){
-			int nt = (int)(kmerNumber / (float)power4s[i]) % 4;
-			if(nt == 0)		{	kmer = 'A' + kmer;	}
+			int nt = (int)(kmerNumber / (float)power4s[i]) % 4;		//	the '%' operator returns the remainder 
+			if(nt == 0)		{	kmer = 'A' + kmer;	}				//	from int-based division ]
 			else if(nt == 1){	kmer = 'C' + kmer;	}
 			else if(nt == 2){	kmer = 'G' + kmer;	}
 			else if(nt == 3){	kmer = 'T' + kmer;	}
@@ -83,10 +111,10 @@ string Kmer::getKmerBases(int kmerNumber){
 
 /**************************************************************************************************/
 
-char Kmer::getASCII(int number)		{	return (char)(33+number);			}
-
+char Kmer::getASCII(int number)		{	return (char)(33+number);			}	// '!' is the first printable char and
+																				// has the int value of 33
 /**************************************************************************************************/
 
-int Kmer::getNumber(char character)	{	return ((int)(character-'!'));		}
+int Kmer::getNumber(char character)	{	return ((int)(character-'!'));		}	// '!' has the value of 33
 
 /**************************************************************************************************/
diff --git a/kmer.hpp b/kmer.hpp
index 21a18e3..31ca9fb 100644
--- a/kmer.hpp
+++ b/kmer.hpp
@@ -20,11 +20,10 @@ public:
 	Kmer(int);
 	string getKmerString(string);
 	int getKmerNumber(string, int);
+	string getKmerBases(int);
 	
 	
 private:
-	string getKmerBases(int);
-	
 	char getASCII(int);
 	int getNumber(char);
 	int kmerSize;
@@ -34,4 +33,5 @@ private:
 
 /**************************************************************************************************/
 
+
 #endif
diff --git a/kmerdb.cpp b/kmerdb.cpp
index ee8168a..27e6997 100644
--- a/kmerdb.cpp
+++ b/kmerdb.cpp
@@ -5,6 +5,21 @@
  *  Created by Pat Schloss on 12/16/08.
  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
  *
+ *	This class is a child class of the Database class, which stores the template sequences as a kmer table and provides
+ *	a method of searching the kmer table for the sequence with the most kmers in common with a query sequence.
+ *	kmerLocations is the primary storage variable that is a two-dimensional vector where each row represents the
+ *	different number of kmers and each column contains the index to sequences that use that kmer.
+ *
+ *	Construction of an object of this type will first look for an appropriately named database file and if it is found
+ *	then will read in the database file (readKmerDB), otherwise it will generate one and store the data in memory
+ *	(generateKmerDB)
+ *
+ *	The search method used here is roughly the same as that used in the SimRank program that is found at the
+ *	greengenes website.  The default kmer size is 7.  The speed complexity is between O(L) and O(LN).  When I use 7mers
+ *	on average a kmer is found in ~100 other sequences with a database of ~5000 sequences.  If this is the case then the
+ *	time would be on the order of O(0.1LN) -> fast.
+ *
+
  */
 
 using namespace std;
@@ -22,16 +37,16 @@ KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerS
 	string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
 	ifstream kmerFileTest(kmerDBName.c_str());
 	
-	int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
+	int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
 	
 	maxKmer = power4s[kmerSize];
 	kmerLocations.resize(maxKmer+1);
 	
-	if(!kmerFileTest){
+	if(!kmerFileTest){		//	if we can open the kmer db file, then read it in...
 		cout << "Generating the " << kmerDBName << " database...\t";	cout.flush();
 		generateKmerDB(kmerDBName);	
 	}
-	else{
+	else{					//	...otherwise generate it.
 		cout << "Reading in the " << kmerDBName << " database...\t";	cout.flush();
 		readKmerDB(kmerDBName, kmerFileTest);
 	}
@@ -42,68 +57,67 @@ KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerS
 /**************************************************************************************************/
 
 Sequence* KmerDB::findClosestSequence(Sequence* candidateSeq){
+
+	vector<int> matches(numSeqs, 0);						//	a record of the sequences with shared kmers
+	vector<int> timesKmerFound(kmerLocations.size()+1, 0);	//	a record of the kmers that we have already found
 	
-	vector<int> matches(numSeqs, 0);
-	vector<int> timesKmerFound(kmerLocations.size()+1, 0);
-	
-	int maxMatches = 0;
+	searchScore = 0;
 	int maxSequence = 0;
 	
-	string query = candidateSeq->getUnaligned();
-	
-	int numKmers = query.length() - kmerSize + 1;
+	string query = candidateSeq->getUnaligned();			//	we want to search using an unaligned dna sequence
+
+	int numKmers = query.length() - kmerSize + 1;	
 	Kmer kmer(kmerSize);
 	
 	for(int i=0;i<numKmers;i++){
-		
-		int kmerNumber = kmer.getKmerNumber(query, i);
-		
-		if(timesKmerFound[kmerNumber] == 0){
-			for(int j=0;j<kmerLocations[kmerNumber].size();j++){
-				matches[kmerLocations[kmerNumber][j]]++;
+		int kmerNumber = kmer.getKmerNumber(query, i);		//	go through the query sequence and get a kmer number
+		if(timesKmerFound[kmerNumber] == 0){				//	if we haven't seen it before...
+			for(int j=0;j<kmerLocations[kmerNumber].size();j++){//increase the count for each sequence that also has
+				matches[kmerLocations[kmerNumber][j]]++;	//	that kmer
 			}
 		}
-		timesKmerFound[kmerNumber] = 1;
-		
+		timesKmerFound[kmerNumber] = 1;						//	ok, we've seen the kmer now
 	}
-	for(int i=0;i<numSeqs;i++){
-		if(matches[i] > maxMatches){
-			maxMatches = matches[i];
+
+	for(int i=0;i<numSeqs;i++){								//	find the db sequence that shares the most kmers with
+		if(matches[i] > searchScore){						//	the query sequence
+			searchScore = matches[i];
 			maxSequence = i;
 		}
 	}
-	return templateSequences[maxSequence];
-	
+	searchScore = 100 * searchScore / (float)numKmers;
+	return templateSequences[maxSequence];					//	return the Sequence object corresponding to the db
+															//	sequence with the most shared kmers.
 }
 
 /**************************************************************************************************/
 
 void KmerDB::generateKmerDB(string kmerDBName){
 	
-	
 	Kmer kmer(kmerSize);
 	
-	for(int i=0;i<numSeqs;i++){
+	for(int i=0;i<numSeqs;i++){								//	for all of the template sequences...
 
-		string seq = templateSequences[i]->getUnaligned();
+		string seq = templateSequences[i]->getUnaligned();	//	...take the unaligned sequence...
 		int numKmers = seq.length() - kmerSize + 1;
 		
-		for(int j=0;j<numKmers;j++){
+		vector<int> seenBefore(maxKmer+1,0);
+		for(int j=0;j<numKmers;j++){						//	...step though the sequence and get each kmer...
 			int kmerNumber = kmer.getKmerNumber(seq, j);
-			kmerLocations[kmerNumber].push_back(i);
-		}
+			if(seenBefore[kmerNumber] == 0){
+				kmerLocations[kmerNumber].push_back(i);		//	...insert the sequence index into kmerLocations for
+			}												//	the appropriate kmer number
+			seenBefore[kmerNumber] = 1;
+		}													
 	}
 	
-	ofstream kmerFile(kmerDBName.c_str(), ios::trunc);
-	if(!kmerFile) {
-		cerr << "Error: Could not open " << kmerDBName << endl;
-		exit(1);
-	}
+	ofstream kmerFile;										//	once we have the kmerLocations folder print it out
+	openOutputFile(kmerDBName, kmerFile);					//	to a file
 	
-	for(int i=0;i<maxKmer;i++){
-		kmerFile << i << ' ' << kmerLocations[i].size();
-		for(int j=0;j<kmerLocations[i].size();j++){
-			kmerFile << ' ' << kmerLocations[i][j];
+	for(int i=0;i<maxKmer;i++){								//	step through all of the possible kmer numbers
+		kmerFile << i << ' ' << kmerLocations[i].size();	//	print the kmer number and the number of sequences with
+		for(int j=0;j<kmerLocations[i].size();j++){			//	that kmer.  then print out the indices of the sequences
+			kmerFile << ' ' << kmerLocations[i][j];			//	with that kmer.
 		}
 		kmerFile << endl;
 	}
@@ -115,18 +129,18 @@ void KmerDB::generateKmerDB(string kmerDBName){
 
 void KmerDB::readKmerDB(string kmerDBName, ifstream& kmerDBFile){
 
-	kmerDBFile.seekg(0);
+	kmerDBFile.seekg(0);									//	start at the beginning of the file
 	
 	string seqName;
 	int seqNumber;
 	
-	for(int i=0;i<numSeqs;i++){
-		int numValues;
+	for(int i=0;i<maxKmer;i++){
+		int numValues;	
 		kmerDBFile >> seqName >> numValues;
 		
-		for(int j=0;j<numValues;j++){
-			kmerDBFile >> seqNumber;
-			kmerLocations[i].push_back(seqNumber);
+		for(int j=0;j<numValues;j++){						//	for each kmer number get the...
+			kmerDBFile >> seqNumber;						//		1. number of sequences with the kmer number
+			kmerLocations[i].push_back(seqNumber);			//		2. sequence indices
 		}
 	}
 	kmerDBFile.close();
diff --git a/kmerdb.hpp b/kmerdb.hpp
index af52789..eb10061 100644
--- a/kmerdb.hpp
+++ b/kmerdb.hpp
@@ -8,6 +8,15 @@
  *  Created by Pat Schloss on 12/16/08.
  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
  *
+ *	This class is a child class of the Database class, which stores the template sequences as a kmer table and provides
+ *	a method of searching the kmer table for the sequence with the most kmers in common with a query sequence.
+ *	kmerLocations is the primary storage variable that is a two-dimensional vector where each row represents the
+ *	different number of kmers and each column contains the index to sequences that use that kmer.
+ *
+ *	Construction of an object of this type will first look for an appropriately named database file and if it is found
+ *	then will read in the database file (readKmerDB), otherwise it will generate one and store the data in memory
+ *	(generateKmerDB)
+
  */
 
 #include "mothur.h"
diff --git a/nast.cpp b/nast.cpp
new file mode 100644
index 0000000..0fae6ab
--- /dev/null
+++ b/nast.cpp
@@ -0,0 +1,362 @@
+/*
+ *  nast.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/17/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This is my implementation of the NAST (nearest alignment space termination) algorithm as described in:
+ *
+ *	DeSantis TZ, Hugenholtz P, Keller K, Brodie EL, Larsen N, Piceno YM, Phan R, & Anderson GL.  2006.  NAST: a multiple
+ *		sequence alignment server for comparative analysis of 16S rRNA genes.  Nucleic Acids Research.  34:W394-9.
+ *
+ *	To construct an object one needs to provide a method of getting a pairwise alignment (alignment) and the template
+ *	and candidate sequence that are to be aligned to each other.
+ *
+ */
+
+using namespace std;
+
+#include "sequence.hpp"
+#include "alignment.hpp"
+#include "nast.hpp"
+
+/**************************************************************************************************/
+
+Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method), candidateSeq(cand), templateSeq(temp) {
+	maxInsertLength = 0;
+	pairwiseAlignSeqs();	//	This is part A in Fig. 2 of DeSantis et al.
+	regapSequences();		//	This is parts B-F in Fig. 2 of DeSantis et al.
+}
+
+/**************************************************************************************************/
+
+void Nast::pairwiseAlignSeqs(){	//	Here we call one of the pairwise alignment methods to align our unaligned candidate
+								//	and template sequences
+	alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
+	
+	string candAln = alignment->getSeqAAln();
+	string tempAln = alignment->getSeqBAln();
+
+	if(candAln == ""){
+		candidateSeq->setPairwise("");
+		templateSeq->setPairwise(templateSeq->getUnaligned());
+	}
+	else{
+		if(tempAln[0] == '-'){
+			int pairwiseAlignmentLength = tempAln.length();	//	we need to make sure that the candidate sequence alignment
+			for(int i=0;i<pairwiseAlignmentLength;i++){		//	starts where the template sequence alignment starts, if it
+				if(isalpha(tempAln[i])){					//	starts early, we nuke the beginning of the candidate
+					candAln = candAln.substr(i);			//	sequence
+					tempAln = tempAln.substr(i);
+					break;
+				}
+			}
+		}
+		
+		int pairwiseAlignmentLength = tempAln.length();
+		if(tempAln[pairwiseAlignmentLength-1] == '-'){		//	we need to make sure that the candidate sequence alignment
+			for(int i=pairwiseAlignmentLength-1; i>=0; i--){//	ends where the template sequence alignment ends, if it runs
+				if(isalpha(tempAln[i])){					//	long, we nuke the end of the candidate sequence
+					candAln = candAln.substr(0,i+1);
+					tempAln = tempAln.substr(0,i+1);
+					break;
+				}		
+			}
+		}
+	}
+	candidateSeq->setPairwise(candAln);					//	set the pairwise sequences in the Sequence objects for
+	templateSeq->setPairwise(tempAln);					//	the candidate and template sequences
+	
+}
+
+/**************************************************************************************************/
+
+void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAlign){
+
+//	here we do steps C-F of Fig. 2 from DeSantis et al.
+	
+	int longAlignmentLength = newTemplateAlign.length();	
+
+	for(int i=0; i<longAlignmentLength; i++){				//	use the long alignment as the standard
+		int rightIndex, rightRoom, leftIndex, leftRoom;
+
+		//	Part C of Fig. 2 from DeSantis et al.
+		if((isalpha(newTemplateAlign[i]) != isalpha(tempAln[i]))){	//if there is a discrepancy between the regapped
+			
+//			cout << i << '\t';cout.flush();
+			
+			//	Part D of Fig. 2 from DeSantis et al.		//	template sequence and the official template sequence
+			for(leftIndex=i-1;leftIndex>=0;leftIndex--){	//	then we've got problems...
+				if(!isalpha(candAln[leftIndex])){
+					leftRoom = 1;	//count how far it is to the nearest gap on the LEFT side of the anomaly
+					while(leftIndex-leftRoom>=0 && !isalpha(candAln[leftIndex-leftRoom]))	{	leftRoom++;		}
+					break;
+				}
+			}
+//			cout << leftIndex << '\t' << leftRoom << endl;
+			
+			for(rightIndex=i+1;rightIndex<longAlignmentLength;rightIndex++){
+				if(!isalpha(candAln[rightIndex])){
+					rightRoom = 1;	//count how far it is to the nearest gap on the RIGHT side of the anomaly
+					while(rightIndex+rightRoom<longAlignmentLength && !isalpha(candAln[rightIndex+rightRoom]))	{	rightRoom++;	}
+					break;
+				}
+			}
+			
+			int insertLength = 0;							//	figure out how long the anomaly is
+			while(!isalpha(newTemplateAlign[i + insertLength]))	{	insertLength++;	}
+			if(insertLength > maxInsertLength){	maxInsertLength = insertLength;	}
+			
+			if((leftRoom + rightRoom) >= insertLength){
+				//	Parts D & E from Fig. 2 of DeSantis et al.
+				if((i-leftIndex) <= (rightIndex-i)){		//	the left gap is closer - > move stuff left there's
+					if(leftRoom >= insertLength){			//	enough room to the left to move
+						string leftTemplateString = newTemplateAlign.substr(0,i);
+						string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+						newTemplateAlign = leftTemplateString + rightTemplateString;
+						longAlignmentLength = newTemplateAlign.length();
+						
+						string leftCandidateString = candAln.substr(0,leftIndex-insertLength+1);
+						string rightCandidateString = candAln.substr(leftIndex+1);
+						candAln = leftCandidateString + rightCandidateString;	
+					}
+					else{									//	not enough room to the left, have to steal some space to
+						string leftTemplateString = newTemplateAlign.substr(0,i);	//	the right
+						string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+						newTemplateAlign = leftTemplateString + rightTemplateString;
+						longAlignmentLength = newTemplateAlign.length();
+						
+						string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
+						string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
+						string rightCandidateString = candAln.substr(rightIndex+(insertLength-leftRoom));
+						candAln = leftCandidateString + insertString + rightCandidateString;	
+					}
+				}
+				else{										//	the right gap is closer - > move stuff right there's
+					if(rightRoom >= insertLength){			//	enough room to the right to move
+						string leftTemplateString = newTemplateAlign.substr(0,i);
+						string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+						newTemplateAlign = leftTemplateString + rightTemplateString;
+						longAlignmentLength = newTemplateAlign.length();
+						
+						string leftCandidateString = candAln.substr(0,rightIndex);
+						string rightCandidateString = candAln.substr(rightIndex+insertLength);
+						candAln = leftCandidateString + rightCandidateString;	
+						
+					}
+					else{									//	not enough room to the right, have to steal some 
+															//	space to the left lets move left and then right...
+						string leftTemplateString = newTemplateAlign.substr(0,i);
+						string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+						newTemplateAlign = leftTemplateString + rightTemplateString;
+						longAlignmentLength = newTemplateAlign.length();
+						
+						string leftCandidateString = candAln.substr(0,leftIndex-(insertLength-rightRoom)+1);
+						string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
+						string rightCandidateString = candAln.substr(rightIndex+rightRoom);
+						candAln = leftCandidateString + insertString + rightCandidateString;	
+						
+					}
+				}
+			}
+			else{											//	there could be a case where there isn't enough room in
+				string leftTemplateString = newTemplateAlign.substr(0,i);			//	either direction to move stuff
+				string rightTemplateString = newTemplateAlign.substr(i+leftRoom+rightRoom);
+				newTemplateAlign = leftTemplateString + rightTemplateString;
+				longAlignmentLength = newTemplateAlign.length();
+				
+				string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
+				string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
+				string rightCandidateString = candAln.substr(rightIndex+rightRoom);
+				candAln = leftCandidateString + insertString + rightCandidateString;	
+			}
+			
+			i -= insertLength;
+		} 
+	}
+//	cout << candAln << endl << tempAln << endl << newTemplateAlign << endl;
+}
+
+/**************************************************************************************************/
+
+void Nast::regapSequences(){	//This is essentially part B in Fig 2. of DeSantis et al.
+	
+	string candPair = candidateSeq->getPairwise();
+	string candAln = "";
+	
+	string tempPair = templateSeq->getPairwise();
+	string tempAln = templateSeq->getAligned();		//	we use the template aligned sequence as our guide
+	
+	int pairwiseLength = candPair.length();
+	int fullAlignLength = tempAln.length();
+	
+	if(candPair == ""){
+		for(int i=0;i<fullAlignLength;i++)	{	candAln += '.';		}
+		candidateSeq->setAligned(candAln);
+		return;
+	}
+	
+	int fullAlignIndex = 0;
+	int pairwiseAlignIndex = 0;
+	string newTemplateAlign = "";					//	this is going to be messy so we want a temporary template
+													//	alignment string
+	while(tempAln[fullAlignIndex] == '.' || tempAln[fullAlignIndex]  == '-'){
+		candAln += '.';								//	add the initial '-' and '.' to the candidate and template
+		newTemplateAlign += tempAln[fullAlignIndex];//	pairwise sequences
+		fullAlignIndex++;
+	}
+	
+	string lastLoop = "";
+	
+	while(pairwiseAlignIndex<pairwiseLength){
+		if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+		   && isalpha(candPair[pairwiseAlignIndex])){
+			//  the template and candidate pairwise and template aligned have characters
+			//	need to add character onto the candidatSeq.aligned sequence
+			
+			candAln += candPair[pairwiseAlignIndex];
+			newTemplateAlign += tempPair[pairwiseAlignIndex];//
+			
+			pairwiseAlignIndex++;
+			fullAlignIndex++;
+		}
+		else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
+				&& isalpha(candPair[pairwiseAlignIndex])){
+			//	the template pairwise and candidate pairwise are characters and the template aligned is a gap
+			//	need to insert gaps into the candidateSeq.aligned sequence
+			
+			candAln += '-';
+			newTemplateAlign += '-';//
+			fullAlignIndex++;
+		}
+		else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+				&& isalpha(candPair[pairwiseAlignIndex])){
+			//  the template pairwise is a gap and the template aligned and pairwise sequences have characters
+			//	this is the alpha scenario.  add character to the candidateSeq.aligned sequence without progressing
+			//	further through the tempAln sequence.
+			
+			candAln += candPair[pairwiseAlignIndex];
+			newTemplateAlign += '-';//
+			pairwiseAlignIndex++;
+		}
+		else if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+				&& !isalpha(candPair[pairwiseAlignIndex])){
+			//  the template pairwise and full alignment are characters and the candidate sequence has a gap
+			//	should not be a big deal, just add the gap position to the candidateSeq.aligned sequence;
+			
+			candAln += candPair[pairwiseAlignIndex];
+			newTemplateAlign += tempAln[fullAlignIndex];//
+			fullAlignIndex++;			
+			pairwiseAlignIndex++;
+		}
+		else if(!isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
+				&& isalpha(candPair[pairwiseAlignIndex])){
+			//	the template pairwise and aligned are gaps while the candidate pairwise has a character
+			//	this would be an insertion, go ahead and add the character->seems to be the opposite of the alpha scenario
+			
+			candAln += candPair[pairwiseAlignIndex];
+			newTemplateAlign += tempAln[fullAlignIndex];//
+			pairwiseAlignIndex++;
+			fullAlignIndex++;			
+		}
+		else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
+				&& !isalpha(candPair[pairwiseAlignIndex])){
+			//	template pairwise has a character, but its full aligned sequence and candidate sequence have gaps
+			//	this would happen like we need to add a gap.  basically the opposite of the alpha situation
+			
+			newTemplateAlign += tempAln[fullAlignIndex];//
+			candAln += "-";
+			fullAlignIndex++;			
+		}
+		else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+				&& !isalpha(candPair[pairwiseAlignIndex])){
+			//	template and candidate pairwise are gaps and the template aligned is not a gap this should not be possible
+			//	would skip the gaps and not progress through full alignment sequence
+			//	not tested yet
+			
+			cout << "We're into D" << ' ' << fullAlignIndex << ' ' << pairwiseAlignIndex <<  endl;
+			pairwiseAlignIndex++;
+		}
+		else{
+			//	everything has a gap - not possible
+			//	not tested yet
+			
+			cout << "We're into F" << ' ' << fullAlignIndex << ' ' << pairwiseAlignIndex <<  endl;
+			pairwiseAlignIndex++;
+			fullAlignIndex++;			
+		}		
+	}
+	
+	for(int i=fullAlignIndex;i<fullAlignLength;i++){
+		candAln += '.';
+		newTemplateAlign += tempAln[i];//
+	}
+	
+	int start, end;
+	for(int i=0;i<candAln.length();i++){
+		if(candAln[i] == 'Z' || !isalnum(candAln[i]))	{	candAln[i] = '.';	}	//	if we padded the alignemnt from
+		else{			start = i;			break;		}							//	blast with Z's, change them to
+	}																				//	'.' characters
+	
+	for(int i=candAln.length()-1;i>=0;i--){											//	ditto.
+		if(candAln[i] == 'Z' || !isalnum(candAln[i]))	{	candAln[i] = '.';	}
+		else{			end = i;			break;		}
+	}
+	
+	for(int i=start;i<=end;i++){					//	go through the candidate alignment sequence and make sure that
+		candAln[i] = toupper(candAln[i]);			//	everything is upper case
+	}
+
+//	cout << candAln << endl;
+//	cout << tempAln << endl;
+//	cout << newTemplateAlign << endl;
+
+	if(candAln.length() != tempAln.length()){		//	if the regapped candidate sequence is longer than the official
+		removeExtraGaps(candAln, tempAln, newTemplateAlign);//	template alignment then we need to do steps C-F in Fig.
+	}												//	2 of Desantis et al.
+	
+//	cout << candAln << endl;
+//	cout << tempAln << endl;
+//	cout << newTemplateAlign << endl;
+
+	candidateSeq->setAligned(candAln);
+}
+
+/**************************************************************************************************/
+
+float Nast::getSimilarityScore(){
+
+	string cand = candidateSeq->getAligned();
+	string temp = templateSeq->getAligned();
+	int alignmentLength = temp.length();
+	int mismatch = 0;
+	int denominator = 0;
+	
+	for(int i=0;i<alignmentLength;i++){
+		if(cand[i] == '-' && temp[i] == '-'){
+			
+		}
+		else if(cand[i] != '.' && temp[i] != '.'){
+			denominator++;
+			
+			if(cand[i] != temp[i]){
+				mismatch++;
+			}
+		}
+	}
+	float similarity = 100 * (1. - mismatch / (float)denominator);
+	if(denominator == 0){	similarity = 0.0000;	}
+
+	return similarity;
+}
+
+/**************************************************************************************************/
+
+int Nast::getMaxInsertLength(){
+	
+	return maxInsertLength;
+	
+}
+	
+/**************************************************************************************************/
diff --git a/nast.hpp b/nast.hpp
new file mode 100644
index 0000000..4cf1b85
--- /dev/null
+++ b/nast.hpp
@@ -0,0 +1,49 @@
+#ifndef NAST_HPP
+#define NAST_HPP
+
+/*
+ *  nast.hpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/17/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This is my implementation of the NAST (nearest alignment space termination) algorithm as described in:
+ *
+ *	DeSantis TZ, Hugenholtz P, Keller K, Brodie EL, Larsen N, Piceno YM, Phan R, & Anderson GL.  2006.  NAST: a multiple
+ *		sequence alignment server for comparative analysis of 16S rRNA genes.  Nucleic Acids Research.  34:W394-9.
+ *
+ *	To construct an object one needs to provide a method of getting a pairwise alignment (alignment) and the template
+ *	and candidate sequence that are to be aligned to each other.
+ *
+ */
+
+#include "mothur.h"
+
+class Alignment;
+class Sequence;
+
+/**************************************************************************************************/
+
+class Nast {
+	
+public:
+	Nast(Alignment*, Sequence*, Sequence*);
+	float getSimilarityScore();
+	int getMaxInsertLength();
+	
+private:
+	void pairwiseAlignSeqs();
+	void regapSequences();
+	void removeExtraGaps(string&, string, string);
+	
+	Alignment* alignment;
+	Sequence* candidateSeq;
+	Sequence* templateSeq;
+	
+	int maxInsertLength;
+};
+
+/**************************************************************************************************/
+
+#endif
diff --git a/nastreport.cpp b/nastreport.cpp
new file mode 100644
index 0000000..67ec702
--- /dev/null
+++ b/nastreport.cpp
@@ -0,0 +1,92 @@
+/*
+ *  nastreport.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/19/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+using namespace std;
+
+#include "sequence.hpp"
+#include "nast.hpp"
+#include "alignment.hpp"
+#include "nastreport.hpp"
+
+/******************************************************************************************************************/
+
+NastReport::NastReport(string candidateReportFName) {
+	openOutputFile(candidateReportFName, candidateReportFile);
+	
+	candidateReportFile << "QueryName\tQueryLength\tTemplateName\tTemplateLength\t";
+	candidateReportFile << "SearchMethod\tSearchScore\t";
+	candidateReportFile << "AlignmentMethod\tQueryStart\tQueryEnd\tTemplateStart\tTemplateEnd\t";
+	candidateReportFile << "PairwiseAlignmentLength\tGapsInQuery\tGapsInTemplate\t";
+	candidateReportFile << "LongestInsert\t";
+	candidateReportFile << "SimBtwnQuery&Template" << endl;
+}
+
+/******************************************************************************************************************/
+
+void NastReport::print(){
+	
+	candidateReportFile << queryName << '\t' << queryLength << '\t' << templateName << '\t' << templateLength << '\t';
+	candidateReportFile << searchMethod << '\t' << setprecision(2) << fixed << searchScore << '\t';
+
+	candidateReportFile << alignmentMethod << '\t' << candidateStartPosition << "\t" << candidateEndPosition << '\t';
+	candidateReportFile << templateStartPosition << "\t" << templateEndPosition << '\t';
+	candidateReportFile << pairwiseAlignmentLength << '\t' << totalGapsInQuery << '\t' << totalGapsInTemplate << '\t';
+	candidateReportFile << longestInsert << '\t';;
+	candidateReportFile << setprecision(2) << similarityToTemplate;
+	
+	candidateReportFile << endl;
+	candidateReportFile.flush();
+}
+
+/******************************************************************************************************************/
+
+void NastReport::setCandidate(Sequence* candSeq){ 
+	queryName = candSeq->getName();
+	queryLength = candSeq->getUnalignLength();
+}
+
+/******************************************************************************************************************/
+
+void NastReport::setTemplate(Sequence* tempSeq){ 
+	templateName = tempSeq->getName();
+	templateLength = tempSeq->getUnalignLength();
+}
+
+/******************************************************************************************************************/
+
+void NastReport::setSearchParameters(string method, float score){
+	searchMethod = method;
+	searchScore = score;
+}
+
+/******************************************************************************************************************/
+
+void NastReport::setAlignmentParameters(string method, Alignment* align){
+	alignmentMethod = method;
+	
+	candidateStartPosition = align->getCandidateStartPos();
+	candidateEndPosition = align->getCandidateEndPos();
+	templateStartPosition = align->getTemplateStartPos();
+	templateEndPosition = align->getTemplateEndPos();
+	pairwiseAlignmentLength = align->getPairwiseLength();
+
+	totalGapsInQuery = pairwiseAlignmentLength - (candidateEndPosition - candidateStartPosition + 1);
+	totalGapsInTemplate = pairwiseAlignmentLength - (templateEndPosition - templateStartPosition + 1);
+}
+
+/******************************************************************************************************************/
+
+void NastReport::setNastParameters(Nast nast){
+
+	longestInsert = nast.getMaxInsertLength();
+	similarityToTemplate = nast.getSimilarityScore();
+	
+}
+
+/******************************************************************************************************************/
diff --git a/nastreport.hpp b/nastreport.hpp
new file mode 100644
index 0000000..424b0bb
--- /dev/null
+++ b/nastreport.hpp
@@ -0,0 +1,51 @@
+#ifndef NASTREPORT_HPP
+#define NASTREPORT_HPP
+
+
+/*
+ *  nastreport.hpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/19/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+
+using namespace std;
+
+/******************************************************************************************************************/
+
+class NastReport {
+
+public:
+	NastReport(string);
+	void setCandidate(Sequence*);
+	void setTemplate(Sequence*);
+	void setSearchParameters(string, float);
+	void setAlignmentParameters(string, Alignment*);
+	void setNastParameters(Nast);
+	void print();
+	
+private:
+	string queryName;
+	int queryLength;
+	string templateName;
+	int templateLength;
+	string searchMethod;
+	float searchScore;
+	string alignmentMethod;
+	int candidateStartPosition, candidateEndPosition;
+	int templateStartPosition, templateEndPosition;
+
+	int pairwiseAlignmentLength;
+	int longestInsert;
+	int totalGapsInQuery, totalGapsInTemplate;
+	float similarityToTemplate;
+	ofstream candidateReportFile;
+};
+
+/******************************************************************************************************************/
+
+#endif
diff --git a/needlemanoverlap.cpp b/needlemanoverlap.cpp
new file mode 100644
index 0000000..8f5d464
--- /dev/null
+++ b/needlemanoverlap.cpp
@@ -0,0 +1,92 @@
+/*
+ *  needleman.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This class is an Alignment child class that implements the Gotoh pairwise alignment algorithm as described in:
+ *		
+ *		Gotoh O. 1982.  An improved algorithm for matching biological sequences.  J. Mol. Biol.  162:705-8.
+ *		Myers, EW & Miller, W.  1988.  Optimal alignments in linear space.  Comput Appl Biosci. 4:11-7.
+ *
+ *	This method is nice because it allows for an affine gap penalty to be assessed, which is analogous to what is used
+ *	in blast and is an alternative to Needleman-Wunsch, which only charges the same penalty for each gap position.
+ *	Because this method typically has problems at the ends when two sequences do not full overlap, we employ a separate
+ *	method to fix the ends (see Overlap class documentation)
+ *
+ */
+
+using namespace std;
+
+#include "alignmentcell.hpp"
+#include "alignment.hpp"
+#include "overlap.hpp"
+#include "needlemanoverlap.hpp"
+
+/**************************************************************************************************/
+
+NeedlemanOverlap::NeedlemanOverlap(float gO, float m, float mm, int r) ://	note that we don't have a gap extend
+gap(gO), match(m), mismatch(mm), Alignment(r) {							//	the gap openning penalty is assessed for
+																		//	every gapped position
+	for(int i=1;i<nCols;i++){
+		alignment[0][i].prevCell = 'l';					//	initialize first row by pointing all poiters to the left
+		alignment[0][i].cValue = 0;						//	and the score to zero
+	}
+	
+	for(int i=1;i<nRows;i++){
+		alignment[i][0].prevCell = 'u';					//	initialize first column by pointing all poiters upwards
+		alignment[i][0].cValue = 0;						//	and the score to zero
+	}
+}
+
+/**************************************************************************************************/
+
+NeedlemanOverlap::~NeedlemanOverlap(){	/*	do nothing	*/	}
+
+/**************************************************************************************************/
+
+void NeedlemanOverlap::align(string A, string B){
+	
+	seqA = ' ' + A;	lA = seqA.length();		//	algorithm requires a dummy space at the beginning of each string
+	seqB = ' ' + B;	lB = seqB.length();		//	algorithm requires a dummy space at the beginning of each string
+	
+	for(int i=1;i<lB;i++){					//	This code was largely translated from Perl code provided in Ex 3.1 
+		for(int j=1;j<lA;j++){				//	of the O'Reilly BLAST book.  I found that the example output had a
+											//	number of errors
+			float diagonal;
+			if(seqB[i] == seqA[j])	{	diagonal = alignment[i-1][j-1].cValue + match;		}
+			else					{	diagonal = alignment[i-1][j-1].cValue + mismatch;	}
+			
+			float up	= alignment[i-1][j].cValue + gap;
+			float left	= alignment[i][j-1].cValue + gap;
+			
+			if(diagonal >= up){
+				if(diagonal >= left){
+					alignment[i][j].cValue = diagonal;
+					alignment[i][j].prevCell = 'd';
+				}
+				else{
+					alignment[i][j].cValue = left;
+					alignment[i][j].prevCell = 'l';
+				}
+			}
+			else{
+				if(up >= left){
+					alignment[i][j].cValue = up;
+					alignment[i][j].prevCell = 'u';
+				}
+				else{
+					alignment[i][j].cValue = left;
+					alignment[i][j].prevCell = 'l';
+				}
+			}
+		}
+	}
+	Overlap over;						
+	over.setOverlap(alignment, lA, lB, 0);		//	Fix gaps at the beginning and end of the sequences
+	traceBack();								//	Traceback the alignment to populate seqAaln and seqBaln
+}
+
+/**************************************************************************************************/
+
diff --git a/needlemanoverlap.hpp b/needlemanoverlap.hpp
new file mode 100644
index 0000000..57d3284
--- /dev/null
+++ b/needlemanoverlap.hpp
@@ -0,0 +1,43 @@
+#ifndef NEEDLEMAN_H
+#define NEEDLEMAN_H
+
+/*
+ *  needleman.h
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This class is an Alignment child class that implements the Needleman-Wunsch pairwise alignment algorithm as
+ *	described in:
+ *		
+ *	Needleman SB & Wunsch CD.  1970.  A general method applicable to the search for similarities in the amino acid
+ *		sequence of two proteins.  J Mol Biol.  48:443-53.
+ *	Korf I, Yandell M, & Bedell J.  2003.  BLAST.  O'Reilly & Associates.  Sebastopol, CA.
+ *
+ *	This method is simple as it assesses a consistent penalty for each gap position.  Because this method typically has
+ *	problems at the ends when two sequences do not full overlap, we employ a separate method to fix the ends (see
+ *	Overlap class documentation)
+ *
+ */
+
+#include "mothur.h"
+
+/**************************************************************************************************/
+
+class NeedlemanOverlap : public Alignment {
+	
+public:
+	NeedlemanOverlap(float, float, float, int);
+	~NeedlemanOverlap();
+	void align(string, string);
+	
+private:	
+	float gap;
+	float match;
+	float mismatch;
+};
+
+/**************************************************************************************************/
+
+#endif
diff --git a/noalign.cpp b/noalign.cpp
new file mode 100644
index 0000000..14e34f6
--- /dev/null
+++ b/noalign.cpp
@@ -0,0 +1,25 @@
+/*
+ *  noalign.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 2/19/09.
+ *  Copyright 2009 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#include "alignment.hpp"
+#include "noalign.hpp"
+
+/**************************************************************************************************/
+
+NoAlign::NoAlign(){	/*	do nothing	*/	}
+
+/**************************************************************************************************/
+
+NoAlign::~NoAlign(){	/*	do nothing	*/	}
+
+/**************************************************************************************************/
+
+void NoAlign::align(string A, string B){	}
+
+/**************************************************************************************************/
diff --git a/noalign.hpp b/noalign.hpp
new file mode 100644
index 0000000..dd1e4ba
--- /dev/null
+++ b/noalign.hpp
@@ -0,0 +1,31 @@
+#ifndef NOALIGN_HPP
+#define NOALIGN_HPP
+
+/*
+ *  noalign.hpp
+ *  
+ *
+ *  Created by Pat Schloss on 2/19/09.
+ *  Copyright 2009 __MyCompanyName__. All rights reserved.
+ *
+ */
+using namespace std;
+
+#include "mothur.h"
+
+/**************************************************************************************************/
+
+class NoAlign : public Alignment {
+	
+public:
+	NoAlign();
+	~NoAlign();
+	void align(string, string);
+	
+private:	
+};
+
+/**************************************************************************************************/
+
+
+#endif
diff --git a/overlap.cpp b/overlap.cpp
new file mode 100644
index 0000000..9645e7a
--- /dev/null
+++ b/overlap.cpp
@@ -0,0 +1,83 @@
+/*
+ *  overlap.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This class cleans up the alignment at the 3' end of the alignments.  Because the Gotoh and Needleman-Wunsch
+ *	algorithms start the traceback from the lower-right corner of the dynamic programming matrix, there may be a lot of
+ *	scattered bases in the alignment near the 3' end of the alignment.  Here we basically look for the largest score
+ *	in the last column and row to determine whether there should be exta gaps in sequence A or sequence B.  The gap
+ *	issues at the 5' end of the alignment seem to take care of themselves in the traceback.
+ *
+ */
+using namespace std;
+
+#include "alignmentcell.hpp"
+#include "overlap.hpp"
+
+
+/**************************************************************************************************/
+
+int Overlap::maxRow(vector<vector<AlignmentCell> >& alignment, const int band){
+	
+	float max = -100;
+	int end = lA - 1;
+	int index = end;
+	
+	for(int i=band;i<lB;i++){					//	find the row where the right most column has the highest alignment
+		if(alignment[i][end].cValue >= max){	//	score.
+			index = i;
+			max = alignment[i][end].cValue;
+		}
+	}
+	return index;
+}
+
+/**************************************************************************************************/
+
+int Overlap::maxColumn(vector<vector<AlignmentCell> >& alignment, const int band){
+	
+	float max = -100;
+	int end = lB - 1;
+	int index = end;
+	
+	for(int i=band;i<lA;i++){					//	find the column where the bottom most column has the highest
+		if(alignment[end][i].cValue >= max){	//	alignment score.
+			index = i;
+			max = alignment[end][i].cValue;
+		}
+	}
+	return index;
+}
+
+/**************************************************************************************************/
+
+void Overlap::setOverlap(vector<vector<AlignmentCell> >& alignment, const int nA, const int nB, const int band=0){
+	
+	lA = nA;
+	lB = nB;	
+	
+	int rowIndex = maxRow(alignment, band);		//	get the index for the row with the highest right hand side score
+	int colIndex = maxColumn(alignment, band);	//	get the index for the column with the highest bottom row score
+	
+	int row = lB-1;
+	int column = lA-1;
+	
+	if(colIndex == column && rowIndex == row){}	//	if the max values are the lower right corner, then we're good
+	else if(alignment[row][colIndex].cValue < alignment[rowIndex][column].cValue){
+		for(int i=rowIndex+1;i<lB;i++){			//	decide whether sequence A or B needs the gaps at the end either set 
+			alignment[i][column].prevCell = 'u';//	the pointer upwards or...
+		}
+		
+	}
+	else {
+		for(int i=colIndex+1;i<lA;i++){
+			alignment[row][i].prevCell = 'l';	//	...to the left
+		}
+	}
+}												//	the traceback should take care of the gaps at the 5' end
+
+/**************************************************************************************************/
+
diff --git a/overlap.hpp b/overlap.hpp
new file mode 100644
index 0000000..f87f808
--- /dev/null
+++ b/overlap.hpp
@@ -0,0 +1,37 @@
+#ifndef OVERLAP_H
+#define OVERLAP_H
+
+/*
+ *  overlap.hpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This class cleans up the alignment at the 3' end of the alignments.  Because the Gotoh and Needleman-Wunsch
+ *	algorithms start the traceback from the lower-right corner of the dynamic programming matrix, there may be a lot of
+ *	scattered bases in the alignment near the 3' end of the alignment.  Here we basically look for the largest score
+ *	in the last column and row to determine whether there should be exta gaps in sequence A or sequence B.  The gap
+ *	issues at the 5' end of the alignment seem to take care of themselves in the traceback.
+ *
+ */
+
+#include "mothur.h"
+
+/**************************************************************************************************/
+
+class Overlap {
+	
+public:
+	Overlap(){};
+	~Overlap(){};
+	void setOverlap(vector<vector<AlignmentCell> >&, const int, const int, const int);
+private:
+	int maxRow(vector<vector<AlignmentCell> >&, const int);
+	int maxColumn(vector<vector<AlignmentCell> >&, const int);
+	int lA, lB;
+};
+
+/**************************************************************************************************/
+
+#endif
diff --git a/rarefact.cpp b/rarefact.cpp
index 5c1ce74..ba0c02e 100644
--- a/rarefact.cpp
+++ b/rarefact.cpp
@@ -94,15 +94,23 @@ try {
 			//randomize the groups
 			random_shuffle(lookup.begin(), lookup.end());
 			
+			//make merge the size of lookup[0]
+			SharedRAbundVector* merge = new SharedRAbundVector(lookup[0]->size());
+			
+			//make copy of lookup zero
+			for(int i = 0; i<lookup[0]->size(); i++) {
+				merge->set(i, lookup[0]->getAbundance(i), "merge");
+			}
+			
 			vector<SharedRAbundVector*> subset;
 			//send each group one at a time
 			for (int k = 0; k < lookup.size(); k++) { 
 				subset.clear(); //clears out old pair of sharedrabunds
 				//add in new pair of sharedrabunds
-				subset.push_back(lookup[0]); subset.push_back(lookup[k]);
+				subset.push_back(merge); subset.push_back(lookup[k]);
 				
 				rcd->updateSharedData(subset, k+1, numGroupComb);
-				mergeVectors(lookup[0], lookup[k]);
+				mergeVectors(merge, lookup[k]);
 			}
 
 			//resets output files
@@ -134,9 +142,7 @@ void Rarefact::mergeVectors(SharedRAbundVector* shared1, SharedRAbundVector* sha
 	try{
 		for (int k = 0; k < shared1->size(); k++) {
 			//merge new species into shared1
-			if ((shared1->getAbundance(k) == 0) && (shared2->getAbundance(k) != 0)) {
-				shared1->set(k, shared2->getAbundance(k), "combo");  //set to 'combo' since this vector now contains multiple groups
-			}
+			shared1->set(k, (shared1->getAbundance(k) + shared2->getAbundance(k)), "combo");  //set to 'combo' since this vector now contains multiple groups
 		}
 	}
 	catch(exception& e) {
diff --git a/sequence.cpp b/sequence.cpp
index b59363e..f136426 100644
--- a/sequence.cpp
+++ b/sequence.cpp
@@ -24,6 +24,36 @@ Sequence::Sequence(string newName, string sequence) {
 	}
 	setUnaligned(sequence);
 }
+//********************************************************************************************************************
+
+Sequence::Sequence(ifstream& fastaFile){
+	
+	string accession;				//	provided a file handle to a fasta-formatted sequence file, read in the next
+	fastaFile >> accession;			//	accession number and sequence we find...
+	setName(accession);
+
+	char letter;
+	string sequence;
+	
+	while(fastaFile){
+		letter= fastaFile.get();
+		if(letter == '>'){
+			fastaFile.putback(letter);
+			break;
+		}
+		else if(isprint(letter)){
+			letter = toupper(letter);
+			if(letter == 'U'){letter = 'T';}
+			sequence += letter;
+		}
+		
+	}
+
+	if(sequence.find_first_of('-') != string::npos){	//	if there are any gaps in the sequence, assume that it is
+		setAligned(sequence);							//	an alignment file
+	}
+	setUnaligned(sequence);								//	also set the unaligned sequence file
+}
 
 //********************************************************************************************************************
 
@@ -122,3 +152,18 @@ void Sequence::printSequence(ostream& out){
 }
 
 //********************************************************************************************************************
+
+int Sequence::getUnalignLength(){
+	return unaligned.length();
+}
+
+//********************************************************************************************************************
+
+int Sequence::getAlignLength(){
+	return aligned.length();
+}
+
+//********************************************************************************************************************
+
+
+
diff --git a/sequence.hpp b/sequence.hpp
index 03cbab7..37ca87c 100644
--- a/sequence.hpp
+++ b/sequence.hpp
@@ -8,6 +8,10 @@
  *  Created by Pat Schloss on 12/15/08.
  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
  *
+ *	A sequence object has three components: i) an accession number / name, ii) the unaligned primary sequence, iii) a
+ *	pairwise aligned sequence, and iv) a sequence that is aligned to a reference alignment.  This class has methods
+ *	to set and get these values for the other classes where they are needed.  *
+ *
  */
 using namespace std;
 
@@ -20,6 +24,7 @@ class Sequence {
 public:
 	Sequence();
 	Sequence(string, string);
+	Sequence(ifstream&);
 	
 	void setName(string);
 	void setUnaligned(string);
@@ -32,7 +37,9 @@ public:
 	string getAligned();
 	string getPairwise();
 	string getUnaligned();
-	int getLength();
+	int getLength();  //the greater of the lengths of unaligned and aligned
+	int getUnalignLength();
+	int getAlignLength();
 	void printSequence(ostream&);
 	
 private:
diff --git a/suffixdb.cpp b/suffixdb.cpp
new file mode 100644
index 0000000..b4639f9
--- /dev/null
+++ b/suffixdb.cpp
@@ -0,0 +1,56 @@
+/*
+ *  suffixdb.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/16/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This is a child class of the Database abstract datatype.  The class is basically a database of suffix trees and an
+ *	encapsulation of the method for finding the most similar tree to an inputted sequence.  the suffixForest objecct
+ *	is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree.  The class also
+ *	provides a method to take an unaligned sequence and find the closest sequence in the suffixForest.  The search
+ *	method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093.  I 
+ *	would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but 
+ *	faster than blast
+ *
+ */
+
+using namespace std;
+
+#include "database.hpp"
+#include "sequence.hpp"
+#include "suffixtree.hpp"
+#include "suffixdb.hpp"
+
+/**************************************************************************************************/
+
+SuffixDB::SuffixDB(string fastaFileName) : Database(fastaFileName) {
+
+	suffixForest.resize(numSeqs);
+	cout << "Generating the suffix tree database...\t";	cout.flush();
+	for(int i=0;i<numSeqs;i++){								//	The parent class' constructor generates the vector of
+		suffixForest[i].loadSequence(templateSequences[i]);	//	template Sequence objects.  Here each of these objects
+	}														//	is used to generate a suffix tree, aka the suffix forest
+	cout << "DONE." << endl << endl;	cout.flush();
+
+}
+
+/**************************************************************************************************/
+
+Sequence* SuffixDB::findClosestSequence(Sequence* candidateSeq){
+
+	int minValue = 2000;
+	int closestSequenceNo = 0;
+	string processedSeq = candidateSeq->convert2ints();		//	the candidate sequence needs to be a string of ints
+	for(int i=0;i<suffixForest.size();i++){					//	scan through the forest and see what the minimum
+		int count = suffixForest[i].countSuffixes(processedSeq, minValue);	//	return score is and keep track of the
+		if(count == minValue){								//	template sequence index that corresponds to that score
+			closestSequenceNo = i;
+		}
+	}
+	searchScore = 100 * (1. - minValue / (float)processedSeq.length());
+	return templateSequences[closestSequenceNo];			//	return the Sequence object that has the minimum score
+	
+}
+
+/**************************************************************************************************/
diff --git a/suffixdb.hpp b/suffixdb.hpp
new file mode 100644
index 0000000..3d0b01d
--- /dev/null
+++ b/suffixdb.hpp
@@ -0,0 +1,36 @@
+#ifndef SUFFIXDB_HPP
+#define SUFFIXDB_HPP
+
+/*
+ *  suffixdb.hpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/16/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This is a child class of the Database abstract datatype.  The class is basically a database of suffix trees and an
+ *	encapsulation of the method for finding the most similar tree to an inputted sequence.  the suffixForest objecct
+ *	is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree.  The class also
+ *	provides a method to take an unaligned sequence and find the closest sequence in the suffixForest.  The search
+ *	method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093.  I 
+ *	would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but 
+ *	faster than blast
+ *
+ */
+
+#include "mothur.h"
+#include "database.hpp"
+
+class SuffixTree;
+
+class SuffixDB : public Database {
+	
+public:
+	SuffixDB(string);
+	Sequence* findClosestSequence(Sequence*);
+
+private:
+	vector<SuffixTree> suffixForest;
+};
+
+#endif
diff --git a/suffixnodes.cpp b/suffixnodes.cpp
new file mode 100644
index 0000000..79826ff
--- /dev/null
+++ b/suffixnodes.cpp
@@ -0,0 +1,96 @@
+/*
+ *  SuffixNodes.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	There are two types of nodes in a suffix tree as I have implemented it.  First, there are the internal nodes that
+ *	have children, these are the SuffixBranch objects.  There are also the terminal nodes, which are the suffixBranches.
+ *  I divided them into two groups to save on memory.  A SuffixTree object will be a vector of SuffixNodes; therefore,
+ *	the values of parentNode, children nodes, and suffix nodes are stored as ints that correspond to indices in the 
+ *	vector
+ *
+ */
+
+#include "suffixnodes.hpp"
+
+using namespace std;
+
+//********************************************************************************************************************
+
+inline char deCodeSequence(char code){
+	
+	if(code == '0')			{	return 'a';	}	//	this method allows us to go from the int string to a char string;
+	else if(code == '1')	{	return 'c';	}	//	it's only really useful if we want to print out the tree
+	else if(code == '2')	{	return 'g';	}
+	else if(code == '3')	{	return 't';	}
+	else if(code == '4')	{	return 'n';	}
+	else					{	return '$';	}
+	
+}
+
+//********************************************************************************************************************
+
+SuffixNode::SuffixNode(int parent, int start, int end) : 
+		parentNode(parent),			//	we store the parent node as an int
+		startCharPosition(start),	//	the suffix tree class will hold the sequence that the startCharPosition and 
+		endCharPosition(end)		//	endCharPosition indices correspond to
+		{	/*	do nothing	*/			}
+
+
+void SuffixNode::setChildren(char, int)			{	/*	do nothing	*/			}	//	there's no children in a leaf
+int SuffixNode::getNumChildren()				{	return 0;					}	//	ditto
+void SuffixNode::eraseChild(char)				{	/*	do nothing	*/			}	//	ditto
+int SuffixNode::getChild(char)					{	return -1;					}	//	ditto
+void SuffixNode::setSuffixNode(int)				{	/*	do nothing	*/			}	//	there's no suffix node in a leaf
+int SuffixNode::getSuffixNode()					{	return -1;					}	//	ditto
+int SuffixNode::getParentNode()					{	return parentNode;			}
+void SuffixNode::setParentNode(int number)		{	parentNode = number;		}
+int SuffixNode::getStartCharPos()				{	return startCharPosition;	}
+void SuffixNode::setStartCharPos(int start)		{	startCharPosition = start;	}
+int SuffixNode::getEndCharPos()					{	return endCharPosition;		}	
+
+//********************************************************************************************************************
+
+SuffixLeaf::SuffixLeaf(int parent, int start, int end) : SuffixNode(parent, start, end) {	/*	do nothing	*/	}
+
+
+void SuffixLeaf::print(string sequence, int nodeNumber){
+	
+	cout << this << '\t' << parentNode << '\t' << nodeNumber << '\t' <<
+	-1 << '\t' << startCharPosition << '\t' << endCharPosition << '\t';
+	
+	cout << '\'';
+	for(int i=startCharPosition;i<=endCharPosition;i++){
+		cout << deCodeSequence(sequence[i]);
+	}
+	cout << '\'' << endl;
+}
+
+//********************************************************************************************************************
+
+SuffixBranch::SuffixBranch(int parent, int start, int end) : SuffixNode(parent, start, end), suffixNode(-1){
+		childNodes.assign(6, -1);
+}
+	
+void SuffixBranch::print(string sequence, int nodeNumber){						//	this method is different that than
+	cout << this << '\t' << parentNode << '\t' << nodeNumber << '\t' <<			//	of a leaf because it prints out a
+	suffixNode << '\t' << startCharPosition << '\t' << endCharPosition << '\t';	//	value for the suffix node	
+	
+	cout << '\'';
+	for(int i=startCharPosition;i<=endCharPosition;i++){
+		cout << deCodeSequence(sequence[i]);
+	}
+	cout << '\'' << endl;
+}
+
+//	we can access the children by subtracting '0' from the the char value from the string, the difference is an int
+//	value and the index we need to access.
+void SuffixBranch::eraseChild(char base)	{	childNodes[base - '0'] = -1;	}	//to erase set the child index to -1
+void SuffixBranch::setChildren(char base, int nodeIndex){	childNodes[base - '0'] = nodeIndex;	}
+void SuffixBranch::setSuffixNode(int nodeIndex){	suffixNode = nodeIndex;		}
+int SuffixBranch::getSuffixNode()			{	return suffixNode;				}
+int SuffixBranch::getChild(char base)		{	return childNodes[base - '0'];	}
+	
+//********************************************************************************************************************
diff --git a/suffixnodes.hpp b/suffixnodes.hpp
new file mode 100644
index 0000000..863a3ae
--- /dev/null
+++ b/suffixnodes.hpp
@@ -0,0 +1,77 @@
+#ifndef SUFFIXNODES_H
+#define SUFFIXNODES_H
+
+/*
+ *  SuffixNodes.h
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	There are two types of nodes in a suffix tree as I have implemented it.  First, there are the internal nodes that
+ *	have children, these are the SuffixBranch objects.  There are also the terminal nodes, which are the suffixBranches.
+ *  I divided them into two groups to save on memory.  A SuffixTree object will be a vector of SuffixNodes; therefore,
+ *	the values of parentNode, children nodes, and suffix nodes are stored as ints that correspond to indices in the 
+ *	vector
+ *
+ */
+
+#include "mothur.h"
+
+using namespace std;
+
+//********************************************************************************************************************
+
+class SuffixNode {
+	
+public:
+	SuffixNode(int, int, int);
+	virtual void print(string, int)	= 0;
+	virtual void setChildren(char, int);
+	virtual int getNumChildren();
+	virtual void eraseChild(char);
+	virtual void setSuffixNode(int);
+	virtual int getSuffixNode();
+	virtual int getChild(char);
+	int getParentNode();
+	void setParentNode(int);
+	int getStartCharPos();
+	void setStartCharPos(int start);
+	int getEndCharPos();
+	
+protected:
+	int parentNode;
+	int startCharPosition;
+	int endCharPosition;
+};
+
+//********************************************************************************************************************
+
+class SuffixLeaf : public SuffixNode {	//	most of the methods are already set in the parent class
+	
+public:
+	SuffixLeaf(int, int, int);		//	we just need to define a constructor and
+	void print(string, int);		//	print method
+};
+
+//********************************************************************************************************************
+
+class SuffixBranch : public SuffixNode {
+	
+public:
+	SuffixBranch(int, int, int);
+	void print(string, int);		//	need a special method for printing the node because there are children
+	void eraseChild(char);			//	need a special method for erasing the children
+	void setChildren(char, int);	//	need a special method for setting children
+	void setSuffixNode(int);		//	need a special method for setting the suffix node
+	int getSuffixNode();			//	need a special method for returning the suffix node
+	int getChild(char);				//	need a special method for return children
+	
+private:
+	vector<int> childNodes;			//	a suffix branch is unique because it has children and a suffixNode.  The 
+	int suffixNode;					//	are stored in a vector for super-fast lookup.  If the alphabet were bigger, this
+};									//	might not be practical.  Since we only have 5 possible letters, it makes sense
+
+//********************************************************************************************************************
+
+#endif
diff --git a/suffixtree.cpp b/suffixtree.cpp
new file mode 100644
index 0000000..56ce6ed
--- /dev/null
+++ b/suffixtree.cpp
@@ -0,0 +1,247 @@
+/*
+ *  suffixtree.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This is my half-assed attempt to implement a suffix tree.  This is a cobbled together algorithm using materials that
+ *	I found at http://marknelson.us/1996/08/01/suffix-trees/ and:
+ *
+ *		Ukkonen E. (1995). On-line construction of suffix trees. Algorithmica 14 (3): 249--260
+ *		Gusfield, Dan (1999). Algorithms on Strings, Trees and Sequences: Computer Science and Computational Biology. 
+ *			USA: Cambridge University Press
+ *
+ *	The Ukkonen paper is the seminal paper describing the on-line method of constructing a suffix tree.
+ *
+ *	I have chosen to store the nodes of the tree as a vector of pointers to SuffixNode objects.  The root is stored at
+ *	nodeVector[0].  Each tree also stores the sequence name and the string that corresponds to the actual sequence. 
+ *	Finally, this class provides a way of counting the number of suffixes that are needed in one tree to generate a new
+ *	sequence (countSuffixes).  This method is used to determine similarity between sequences and was inspired by the
+ *	article and Perl source code provided at http://www.ddj.com/web-development/184416093.
+ *
+ */
+
+#include "sequence.hpp"
+#include "suffixnodes.hpp"
+#include "suffixtree.hpp"
+
+using namespace std;
+
+//********************************************************************************************************************
+
+inline bool compareParents(SuffixNode* left, SuffixNode* right){//	this is necessary to print the tree and to sort the
+	return (left->getParentNode() < right->getParentNode());	//	nodes in order of their parent
+}
+
+//********************************************************************************************************************
+
+SuffixTree::SuffixTree(){}
+
+//********************************************************************************************************************
+
+SuffixTree::~SuffixTree(){
+	for(int i=0;i<nodeVector.size();i++){	delete nodeVector[i];	}	
+}
+
+//********************************************************************************************************************
+
+void SuffixTree::loadSequence(Sequence* seq){
+	nodeCounter = 0;							//	initially there are 0 nodes in the tree
+	activeStartPosition = 0;
+	activeEndPosition = -1;						
+	seqName = seq->getName();
+	sequence = seq->convert2ints();
+	sequence += '5';							//	this essentially concatenates a '$' to the end of the sequence to
+	int seqLength = sequence.length();			//	make it a cononical suffix tree
+	
+	nodeVector.push_back(new SuffixBranch(-1, 0, -1));	//	enter the root of the suffix tree
+	
+	activeNode = root = 0;
+	string hold;
+	for(int i=0;i<seqLength;i++){
+		addPrefix(i);							//	step through the sequence adding each prefix
+	}
+}
+
+//********************************************************************************************************************
+
+string SuffixTree::getSeqName()	{
+	return seqName;		
+}
+
+//********************************************************************************************************************
+
+void SuffixTree::print(){
+	vector<SuffixNode*> hold = nodeVector;
+	sort(hold.begin(), hold.end(), compareParents);
+	cout << "Address\t\tParent\tNode\tSuffix\tStartC\tEndC\tSuffix" << endl;
+	for(int i=1;i<=nodeCounter;i++){
+		hold[i]->print(sequence, i);
+	}
+}
+
+//********************************************************************************************************************
+
+int SuffixTree::countSuffixes(string compareSequence, int& minValue){	//	here we count the number of suffix parts 
+															//	we need to rewrite a user supplied sequence.  if the 
+	int numSuffixes = 0;									//	count exceeds the supplied minValue, bail out.  The
+	int seqLength = compareSequence.length();				//	time complexity should be O(L)
+	int position = 0;
+	
+	int presentNode = 0;
+	
+	while(position < seqLength){		//	while the position in the query sequence isn't at the end...
+		
+		if(numSuffixes > minValue)	{	return 1000000;		}	//	bail if the count gets too high
+		
+		int newNode = nodeVector[presentNode]->getChild(compareSequence[position]);	//	see if the current node has a
+																//	child that matches the next character in the query
+		if(newNode == -1){										
+			if(presentNode == 0){	position++;		}			//	if not, go back to the root and increase the count
+			numSuffixes++;										//	by one.
+			presentNode = 0;
+		}
+		else{													//	if there is, move to that node and see how far down
+			presentNode = newNode;								//	it we can get
+			
+			for(int i=nodeVector[newNode]->getStartCharPos(); i<=nodeVector[newNode]->getEndCharPos(); i++){
+				if(compareSequence[position] == sequence[i]){
+					position++;									//	as long as the query and branch agree, keep going
+				}
+				else{
+					numSuffixes++;								//	if there is a mismatch, increase the number of 
+					presentNode = 0;							//	suffixes and go back to the root
+					break;
+				}
+			}
+		}
+		//	if we get all the way through the node we'll go to the top of the while loop and find the child node
+		//	that corresponds to what we are interested in		
+	}
+	numSuffixes--;												//	the method puts an extra count on numSuffixes
+	
+	if(numSuffixes < minValue)	{	minValue = numSuffixes;	}	//	if the count is less than the previous minValue,
+	return numSuffixes;											//	change the value and return the number of suffixes
+	
+}
+
+//********************************************************************************************************************
+
+void SuffixTree::canonize(){	//	if you have to ask how this works, you don't really want to know and this really
+								//	isn't the place to ask.
+	if ( isExplicit() == 0 ) {	//	if the node has no children...
+		
+		int tempNodeIndex = nodeVector[activeNode]->getChild(sequence[activeStartPosition]);
+		SuffixNode* tempNode = nodeVector[tempNodeIndex];
+		
+		int span = tempNode->getEndCharPos() - tempNode->getStartCharPos();
+		
+		while ( span <= ( activeEndPosition - activeStartPosition ) ) {
+			
+            activeStartPosition = activeStartPosition + span + 1;
+			
+			activeNode = tempNodeIndex;
+			
+            if ( activeStartPosition <= activeEndPosition ) {
+				tempNodeIndex = nodeVector[tempNodeIndex]->getChild(sequence[activeStartPosition]);
+				tempNode = nodeVector[tempNodeIndex];
+				span = tempNode->getEndCharPos() - tempNode->getStartCharPos();
+            }
+			
+        }
+    }
+}
+
+//********************************************************************************************************************
+
+int SuffixTree::split(int nodeIndex, int position){	//	leaves stay leaves, etc, to split a leaf we make a new interior 
+													//	node and reconnect everything
+	SuffixNode* node = nodeVector[nodeIndex];					//	get the node that needs to be split
+	SuffixNode* parentNode = nodeVector[node->getParentNode()];	//	get it's parent node
+	
+	parentNode->eraseChild(sequence[node->getStartCharPos()]);	//	erase the present node from the registry of its parent
+	
+	nodeCounter++;
+	SuffixNode* newNode = new SuffixBranch(node->getParentNode(), node->getStartCharPos(), node->getStartCharPos() + activeEndPosition - activeStartPosition);	//	create a new node that will link the parent with the old child
+	parentNode->setChildren(sequence[newNode->getStartCharPos()], nodeCounter);//	give the parent the new child
+	nodeVector.push_back(newNode);
+	
+	node->setParentNode(nodeCounter);	//	give the original node the new node as its parent
+	newNode->setChildren(sequence[node->getStartCharPos() + activeEndPosition - activeStartPosition + 1], nodeIndex);
+	//	put the original node in the registry of the new node's children
+	newNode->setSuffixNode(activeNode);//link the new node with the old active node
+	
+	//	recalculate the startCharPosition of the outermost node
+	node->setStartCharPos(node->getStartCharPos() + activeEndPosition - activeStartPosition + 1 );
+	
+	return node->getParentNode();
+}
+
+//********************************************************************************************************************
+
+void SuffixTree::makeSuffixLink(int& previous, int present){
+	
+//	here we link the nodes that are suffixes of one another to rapidly speed through the tree
+	if ( previous > 0 ) {	nodeVector[previous]->setSuffixNode(present);	}
+	else				{	/*	do nothing								*/	}
+	
+    previous = present;
+}
+
+//********************************************************************************************************************
+
+void SuffixTree::addPrefix(int prefixPosition){
+	
+	int lastParentNode = -1;	//	we need to place a new prefix in the suffix tree
+	int parentNode = 0;
+	
+	while(1){
+		
+		parentNode = activeNode;
+		
+		if(isExplicit() == 1){	//	if the node is explicit (has kids), try to follow it down the branch if its there...
+			if(nodeVector[activeNode]->getChild(sequence[prefixPosition]) != -1){	//	break out and get next prefix...
+				break;												
+			}
+			else{				//	...otherwise continue, we'll need to make a new node later on...
+			}
+		}
+		else{					//	if it's not explicit (no kids), read through and see if all of the chars agree...
+			int tempNode = nodeVector[activeNode]->getChild(sequence[activeStartPosition]);
+			int span = activeEndPosition - activeStartPosition;
+			
+			if(sequence[nodeVector[tempNode]->getStartCharPos() + span + 1] == sequence[prefixPosition] ){
+				break;			//	if the existing suffix agrees with the new one, grab a new prefix...
+			}
+			else{
+				parentNode = split(tempNode, prefixPosition);	//	... otherwise we need to split the node
+			}
+			
+		}
+		
+		nodeCounter++;	//	we need to generate a new node here if the kid didn't exist, or we split a node
+		SuffixNode* newSuffixLeaf = new SuffixLeaf(parentNode, prefixPosition, sequence.length()-1);
+		nodeVector[parentNode]->setChildren(sequence[prefixPosition], nodeCounter);
+		nodeVector.push_back(newSuffixLeaf);
+		
+		makeSuffixLink( lastParentNode, parentNode );		//	make a suffix link for the parent node
+		
+		if(nodeVector[activeNode]->getParentNode() == -1){	//	move along the start position for the tree
+            activeStartPosition++;
+        } 
+		else {
+            activeNode = nodeVector[activeNode]->getSuffixNode();
+		}
+		canonize();											//	frankly, i'm not entirely clear on what canonize does.
+	}
+	
+	makeSuffixLink( lastParentNode, parentNode );
+	activeEndPosition++;									//	move along the end position for the tree
+	
+	canonize();												//	frankly, i'm not entirely clear on what canonize does.
+	
+}
+
+//********************************************************************************************************************
+
diff --git a/suffixtree.hpp b/suffixtree.hpp
new file mode 100644
index 0000000..387d4bd
--- /dev/null
+++ b/suffixtree.hpp
@@ -0,0 +1,69 @@
+#ifndef SUFFIXTREE_H
+#define SUFFIXTREE_H
+
+/*
+ *  suffixtree.h
+ *  
+ *
+ *  Created by Pat Schloss on 12/15/08.
+ *  Copyright 2008 Patrick D. Schloss. All rights reserved.
+ *
+ *	This is my half-assed attempt to implement a suffix tree.  This is a cobbled together algorithm using materials that
+ *	I found at http://marknelson.us/1996/08/01/suffix-trees/ and:
+ *
+ *		Ukkonen E. (1995). On-line construction of suffix trees. Algorithmica 14 (3): 249--260
+ *		Gusfield, Dan (1999). Algorithms on Strings, Trees and Sequences: Computer Science and Computational Biology. 
+ *			USA: Cambridge University Press
+ *
+ *	The Ukkonen paper is the seminal paper describing the on-line method of constructing a suffix tree.
+ *
+ *	I have chosen to store the nodes of the tree as a vector of pointers to SuffixNode objects.  The root is stored at
+ *	nodeVector[0].  Each tree also stores the sequence name and the string that corresponds to the actual sequence. 
+ *	Finally, this class provides a way of counting the number of suffixes that are needed in one tree to generate a new
+ *	sequence (countSuffixes).  This method is used to determine similarity between sequences and was inspired by the
+ *	article and Perl source code provided at http://www.ddj.com/web-development/184416093.
+ *
+ */
+
+#include "mothur.h"
+
+using namespace std;
+
+class SuffixNode;
+
+//********************************************************************************************************************
+
+class SuffixTree {
+	
+public:
+	SuffixTree();
+	~SuffixTree();
+//	SuffixTree(string, string);
+
+	void loadSequence(Sequence*);
+	string getSeqName();
+	void print();	
+	int countSuffixes(string, int&);
+
+private:	
+	void addPrefix(int);
+	void canonize();
+	int split(int, int);
+	void makeSuffixLink(int&, int);
+	bool isExplicit(){	return activeStartPosition > activeEndPosition;	}
+	
+	int activeStartPosition;
+	int activeEndPosition;
+	
+	vector<SuffixNode*> nodeVector;
+	int root;
+	int activeNode;
+	int nodeCounter;
+	string seqName;
+	string sequence;
+	
+};
+
+//********************************************************************************************************************
+
+#endif
diff --git a/validcommands.cpp b/validcommands.cpp
index 5549e63..a5f3651 100644
--- a/validcommands.cpp
+++ b/validcommands.cpp
@@ -43,6 +43,7 @@ ValidCommands::ValidCommands() {
 		commands["concensus"]			= "concensus";
 		commands["help"]				= "help"; 
 		commands["filter.seqs"]			= "filter.seqs";
+		commands["align.seqs"]			= "align.seqs";
 		commands["quit"]				= "quit"; 
 
 				
diff --git a/validparameter.cpp b/validparameter.cpp
index 8af4a81..e79a2db 100644
--- a/validparameter.cpp
+++ b/validparameter.cpp
@@ -211,7 +211,6 @@ void ValidParameters::initCommandParameters() {
 		//{"parameter1","parameter2",...,"last parameter"};
 		
 		string readdistArray[] = {"phylip","column", "name","cutoff","precision", "group"};
-
 		commandParameters["read.dist"] = addParameters(readdistArray, sizeof(readdistArray)/sizeof(string));
 
 		string readotuArray[] =  {"list","order","shared", "line", "label","group","sabund", "rabund"};
@@ -292,6 +291,9 @@ void ValidParameters::initCommandParameters() {
 		string distanceArray[] =  {"fasta","phylip","clustal","nexus", "calc", "ends", "cutoff", "processors"};
 		commandParameters["dist.seqs"] = addParameters(distanceArray, sizeof(distanceArray)/sizeof(string));
 		
+		string AlignArray[] =  {"fasta","phylip","clustal","nexus", "template", "ksize", "align", "match", "mismatch", "gapopen", "gapextend"};
+		commandParameters["align.seqs"] = addParameters(AlignArray, sizeof(AlignArray)/sizeof(string));
+		
 		string quitArray[] = {};
 		commandParameters["quit"] = addParameters(quitArray, sizeof(quitArray)/sizeof(string));
 
-- 
2.39.2