From ef3f6d42fe720cd6d91419e5e32f8c04d8765010 Mon Sep 17 00:00:00 2001
From: westcott <westcott>
Date: Tue, 17 Nov 2009 17:14:58 +0000
Subject: [PATCH] final fixes for 1.7

---
 Mothur.xcodeproj/project.pbxproj |  20 ++--
 aligncommand.cpp                 |   1 +
 alignmentdb.cpp                  |   2 +-
 alignmentdb.h                    |   2 +-
 bayesian.cpp                     |   2 +-
 chimeracheckrdp.cpp              |   2 +-
 classify.cpp                     |   2 +-
 classify.h                       |   7 +-
 classifyseqscommand.cpp          |  29 ++---
 classifyseqscommand.h            |  10 ++
 doTaxonomy.cpp                   | 177 ++++++++++++++++++-------------
 doTaxonomy.h                     |   1 +
 hclustercommand.cpp              |   4 +-
 knn.cpp                          |   6 +-
 knn.h                            |   2 +-
 mothur.cpp                       |   2 +-
 readotucommand.cpp               |   3 +-
 17 files changed, 149 insertions(+), 123 deletions(-)
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj
index 27a8b91..977aee9 100644
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -541,18 +541,18 @@
 		A751ACE91098B283003D0911 /* readcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readcluster.cpp; sourceTree = SOURCE_ROOT; };
 		A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
 		A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
-		A7861476109F5FA00010AAAF /* classifyseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifyseqscommand.h; sourceTree = "<group>"; };
-		A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifyseqscommand.cpp; sourceTree = "<group>"; };
+		A7861476109F5FA00010AAAF /* classifyseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifyseqscommand.h; sourceTree = SOURCE_ROOT; };
+		A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifyseqscommand.cpp; sourceTree = SOURCE_ROOT; };
 		A78780FE10A06B8B0086103D /* classify.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classify.h; sourceTree = SOURCE_ROOT; };
 		A787810210A06D5D0086103D /* classify.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classify.cpp; sourceTree = SOURCE_ROOT; };
-		A78781B810A0813D0086103D /* doTaxonomy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = doTaxonomy.cpp; sourceTree = "<group>"; };
-		A78781B910A0813D0086103D /* doTaxonomy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = doTaxonomy.h; sourceTree = "<group>"; };
-		A787820F10A0AAE70086103D /* bayesian.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bayesian.h; sourceTree = "<group>"; };
-		A787821010A0AAE70086103D /* bayesian.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bayesian.cpp; sourceTree = "<group>"; };
-		A78782A910A1B1CB0086103D /* alignmentdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignmentdb.h; sourceTree = "<group>"; };
-		A78782AA10A1B1CB0086103D /* alignmentdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignmentdb.cpp; sourceTree = "<group>"; };
-		A787844310A1EBDD0086103D /* knn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = knn.h; sourceTree = "<group>"; };
-		A787844410A1EBDD0086103D /* knn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = knn.cpp; sourceTree = "<group>"; };
+		A78781B810A0813D0086103D /* doTaxonomy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = doTaxonomy.cpp; sourceTree = SOURCE_ROOT; };
+		A78781B910A0813D0086103D /* doTaxonomy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = doTaxonomy.h; sourceTree = SOURCE_ROOT; };
+		A787820F10A0AAE70086103D /* bayesian.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bayesian.h; sourceTree = SOURCE_ROOT; };
+		A787821010A0AAE70086103D /* bayesian.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bayesian.cpp; sourceTree = SOURCE_ROOT; };
+		A78782A910A1B1CB0086103D /* alignmentdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignmentdb.h; sourceTree = SOURCE_ROOT; };
+		A78782AA10A1B1CB0086103D /* alignmentdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignmentdb.cpp; sourceTree = SOURCE_ROOT; };
+		A787844310A1EBDD0086103D /* knn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = knn.h; sourceTree = SOURCE_ROOT; };
+		A787844410A1EBDD0086103D /* knn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = knn.cpp; sourceTree = SOURCE_ROOT; };
 		A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; };
 		A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraslayer.cpp; sourceTree = SOURCE_ROOT; };
 		A7B0450C106CEEC90046FC83 /* slayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slayer.h; sourceTree = SOURCE_ROOT; };
diff --git a/aligncommand.cpp b/aligncommand.cpp
index 2ca450a..57eebb0 100644
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -266,6 +266,7 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
 			report.setCandidate(candidateSeq);
 	
 			Sequence temp = templateDB->findClosestSequence(candidateSeq);
+
 			Sequence* templateSeq = &temp;
 
 			report.setTemplate(templateSeq);
diff --git a/alignmentdb.cpp b/alignmentdb.cpp
index 493521e..6b05989 100644
--- a/alignmentdb.cpp
+++ b/alignmentdb.cpp
@@ -14,7 +14,7 @@
 
 
 /**************************************************************************************************/
-AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch){		//	This assumes that the template database is in fasta format, may 
+AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){		//	This assumes that the template database is in fasta format, may 
 	try {											//	need to alter this in the future?
 		longest = 0;
 		
diff --git a/alignmentdb.h b/alignmentdb.h
index 339f46f..fe4e526 100644
--- a/alignmentdb.h
+++ b/alignmentdb.h
@@ -20,7 +20,7 @@ class AlignmentDB {
 
 public:
 
-	AlignmentDB(string, string, int, int, int, int, int);  //reads fastafile passed in and stores sequences
+	AlignmentDB(string, string, int, float, float, float, float);  //reads fastafile passed in and stores sequences
 	~AlignmentDB();
 	
 	Sequence findClosestSequence(Sequence*);
diff --git a/bayesian.cpp b/bayesian.cpp
index 3a2d3e0..01e4139 100644
--- a/bayesian.cpp
+++ b/bayesian.cpp
@@ -12,7 +12,7 @@
 
 /**************************************************************************************************/
 Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff) : 
-Classify(tfile, tempFile, method, ksize, 0, 0, 0, 0), kmerSize(ksize), confidenceThreshold(cutoff)  {
+Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), confidenceThreshold(cutoff)  {
 	try {
 					
 		numKmers = database->getMaxKmer() + 1;
diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp
index 07fb8ab..f9b2319 100644
--- a/chimeracheckrdp.cpp
+++ b/chimeracheckrdp.cpp
@@ -82,7 +82,7 @@ void ChimeraCheckRDP::getChimeras() {
 		querySeqs = readSeqs(fastafile);
 		mothurOut("Done."); 
 		//templateSeqs = readSeqs(templateFile);
-		templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0,0,0,0);
+		templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
 		mothurOutEndLine();
 		
 		int numSeqs = querySeqs.size();
diff --git a/classify.cpp b/classify.cpp
index f6b7e80..9431c6c 100644
--- a/classify.cpp
+++ b/classify.cpp
@@ -15,7 +15,7 @@
 
 /**************************************************************************************************/
 
-Classify::Classify(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch) : taxFile(tfile), templateFile(tempFile) {		
+Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) {		
 	try {											
 		readTaxonomy(taxFile);	
 		
diff --git a/classify.h b/classify.h
index 0314235..f13224a 100644
--- a/classify.h
+++ b/classify.h
@@ -11,7 +11,7 @@
  */
 
 
-/* This class is a parent to phylotyp, bayesian, knn.  */
+/* This class is a parent to bayesian, knn.  */
 
 #include "mothur.h"
 #include "database.hpp"
@@ -26,10 +26,9 @@ class Sequence;
 class Classify {
 
 public:
-	Classify(string, string, string, int, int, int, int, int);
-	Classify(){	delete phyloTree;  }
+	Classify(string, string, string, int, float, float, float, float);
 	
-	virtual ~Classify(){};
+	virtual ~Classify(){  delete phyloTree; delete database;  };
 	virtual string getTaxonomy(Sequence*) = 0;
 	//virtual map<string, int> getConfidenceScores() { return taxConfidenceScore; }
 	//virtual vector<string> parseTax(string);
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp
index 2564293..40bf938 100644
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -181,9 +181,8 @@ int ClassifySeqsCommand::execute(){
 			inFASTA.close();
 			
 			lines.push_back(new linePair(0, numFastaSeqs));
-			
+		
 			driver(lines[0], newTaxonomyFile, tempTaxonomyFile);
-			
 		}
 		else{
 			vector<int> positions;
@@ -227,15 +226,14 @@ int ClassifySeqsCommand::execute(){
 		}
 #else
 		ifstream inFASTA;
-		openInputFile(candidateFileName, inFASTA);
+		openInputFile(fastaFileName, inFASTA);
 		numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
 		inFASTA.close();
 		
 		lines.push_back(new linePair(0, numFastaSeqs));
 		
 		driver(lines[0], newTaxonomyFile, tempTaxonomyFile);
-#endif
-		
+#endif	
 		delete classify;
 		
 		//make taxonomy tree from new taxonomy file 
@@ -356,27 +354,14 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa
 			taxonomy = classify->getTaxonomy(candidateSeq);
 			
 			if (taxonomy != "bad seq") {
-				//if (method != "bayesian") {
-					outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
-					outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
-				//}else{
-					//vector<string> pTax = classify->parseTax(taxonomy);
-					//map<string, int> confidence = classify->getConfidenceScores();
-					
-					//outTax << candidateSeq->getName() << '\t';
-					//for (int j = 0; j < pTax.size(); j++) {
-						//if (confidence[pTax[j]] > cutoff) {
-						//	outTax << pTax[j] << "(" << confidence[pTax[j]] << ");";
-						//}else{ break; }
-					//}
-					//outTax << endl;
-				//}
+				outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+				outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
 			}
 							
 			delete candidateSeq;
 			
-			if(i % 100 == 0){
-				mothurOut("Classifying sequence " + toString(i)); mothurOutEndLine();
+			if((i+1) % 100 == 0){
+				mothurOut("Classifying sequence " + toString(i+1)); mothurOutEndLine();
 			}
 		}
 		
diff --git a/classifyseqscommand.h b/classifyseqscommand.h
index a73aa0c..ed2819b 100644
--- a/classifyseqscommand.h
+++ b/classifyseqscommand.h
@@ -15,6 +15,16 @@
 #include "alignment.hpp"
 #include "classify.h"
 
+//KNN and Bayesian methods modeled from algorithms in
+//NaÄ±Â¨ve Bayesian Classiï¬er for Rapid Assignment of rRNA Sequences 
+//into the New Bacterial Taxonomyô°â  
+//Qiong Wang,1 George M. Garrity,1,2 James M. Tiedje,1,2 and James R. Cole1* 
+//Center for Microbial Ecology1 and Department of Microbiology and Molecular Genetics,2 Michigan State University, 
+//East Lansing, Michigan 48824 
+//Received 10 January 2007/Accepted 18 June 2007 
+
+
+
 class ClassifySeqsCommand : public Command {
 	
 public:
diff --git a/doTaxonomy.cpp b/doTaxonomy.cpp
index 06337b4..40869b9 100644
--- a/doTaxonomy.cpp
+++ b/doTaxonomy.cpp
@@ -12,74 +12,89 @@
 /**************************************************************************************************/
 
 PhyloTree::PhyloTree(){
-	
-	numNodes = 1;
-	numSeqs = 0;
-	tree.push_back(TaxNode("Root"));
-	tree[0].heirarchyID = "0";
+	try {
+		numNodes = 1;
+		numSeqs = 0;
+		tree.push_back(TaxNode("Root"));
+		tree[0].heirarchyID = "0";
+	}
+	catch(exception& e) {
+		errorOut(e, "PhyloTree", "PhyloTree");
+		exit(1);
+	}
 }
 
 /**************************************************************************************************/
 
 string PhyloTree::getNextTaxon(string& heirarchy){
-	
-	string currentLevel = "";
-	if(heirarchy != ""){
-		currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
-		heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
+	try {
+		string currentLevel = "";
+		if(heirarchy != ""){
+			currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
+			heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
+		}
+		return currentLevel;
+	}
+	catch(exception& e) {
+		errorOut(e, "PhyloTree", "getNextTaxon");
+		exit(1);
 	}
-	return currentLevel;
-	
 }
 
 /**************************************************************************************************/
 
 void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
-
-	numSeqs++;
-
-	map<string, int>::iterator childPointer;
-
-	int currentNode = 0;
-	int level = 1;
-	
-	tree[0].accessions.push_back(seqName);
-	string taxon;// = getNextTaxon(seqTaxonomy);
-
-	while(seqTaxonomy != ""){
-
-		level++;
-
-//somehow the parent is getting one too many accnos
-//use print to reassign the taxa id
-		taxon = getNextTaxon(seqTaxonomy);
-
-		childPointer = tree[currentNode].children.find(taxon);
-
-		if(childPointer != tree[currentNode].children.end()){	//if the node already exists, move on
-			currentNode = childPointer->second;
-			tree[currentNode].accessions.push_back(seqName);
-			name2Taxonomy[seqName] = currentNode;
-		}
-		else{											//otherwise, create it
-			tree.push_back(TaxNode(taxon));
-			numNodes++;
-			tree[currentNode].children[taxon] = numNodes-1;
-			tree[numNodes-1].parent = currentNode;
-
-//			int numChildren = tree[currentNode].children.size();
-//			string heirarchyID = tree[currentNode].heirarchyID;
-//			tree[currentNode].accessions.push_back(seqName);
+	try {
+		numSeqs++;
+		
+		map<string, int>::iterator childPointer;
+		
+		int currentNode = 0;
+		int level = 1;
+		
+		tree[0].accessions.push_back(seqName);
+		string taxon;// = getNextTaxon(seqTaxonomy);
+		
+		while(seqTaxonomy != ""){
 			
-			currentNode = tree[currentNode].children[taxon];
-			tree[currentNode].accessions.push_back(seqName);
-			name2Taxonomy[seqName] = currentNode;
-//			tree[currentNode].level = level;
-//			tree[currentNode].childNumber = numChildren;
-//			tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
-		}
+			level++;
+			
+			//somehow the parent is getting one too many accnos
+			//use print to reassign the taxa id
+			taxon = getNextTaxon(seqTaxonomy);
+			
+			childPointer = tree[currentNode].children.find(taxon);
+			
+			if(childPointer != tree[currentNode].children.end()){	//if the node already exists, move on
+				currentNode = childPointer->second;
+				tree[currentNode].accessions.push_back(seqName);
+				name2Taxonomy[seqName] = currentNode;
+			}
+			else{											//otherwise, create it
+				tree.push_back(TaxNode(taxon));
+				numNodes++;
+				tree[currentNode].children[taxon] = numNodes-1;
+				tree[numNodes-1].parent = currentNode;
+				
+				//			int numChildren = tree[currentNode].children.size();
+				//			string heirarchyID = tree[currentNode].heirarchyID;
+				//			tree[currentNode].accessions.push_back(seqName);
+				
+				currentNode = tree[currentNode].children[taxon];
+				tree[currentNode].accessions.push_back(seqName);
+				name2Taxonomy[seqName] = currentNode;
+				//			tree[currentNode].level = level;
+				//			tree[currentNode].childNumber = numChildren;
+				//			tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
+			}
 		
-		if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode;	}
+			if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode;	}
+		}
+
+	}
+	catch(exception& e) {
+		errorOut(e, "PhyloTree", "addSeqToTree");
+		exit(1);
 	}
 }
 /**************************************************************************************************/
@@ -100,38 +115,50 @@ vector<int> PhyloTree::getGenusNodes()	{
 /**************************************************************************************************/
 
 void PhyloTree::assignHeirarchyIDs(int index){
-	
-	map<string,int>::iterator it;
-	int counter = 1;
-	
-	for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
-		tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
-		counter++;
-		tree[it->second].level = tree[index].level + 1;
-		assignHeirarchyIDs(it->second);
+	try {
+		map<string,int>::iterator it;
+		int counter = 1;
+		
+		for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
+			tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
+			counter++;
+			tree[it->second].level = tree[index].level + 1;
+			assignHeirarchyIDs(it->second);
+		}
+	}
+	catch(exception& e) {
+		errorOut(e, "PhyloTree", "assignHeirarchyIDs");
+		exit(1);
 	}
 }
 
 /**************************************************************************************************/
 
 void PhyloTree::print(ofstream& out){
-	
-	out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
-	print(0, out);
-
-	
+	try {
+		out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
+		print(0, out);
+	}
+	catch(exception& e) {
+		errorOut(e, "PhyloTree", "print");
+		exit(1);
+	}
 }
 
 /**************************************************************************************************/
 
 void PhyloTree::print(int i, ofstream& out){
-	
-	map<string,int>::iterator it;
-	for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
-		out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
-		print(it->second, out);
+	try {
+		map<string,int>::iterator it;
+		for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
+			out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
+			print(it->second, out);
+		}
+	}
+	catch(exception& e) {
+		errorOut(e, "PhyloTree", "print");
+		exit(1);
 	}
-
 }
 
 /**************************************************************************************************/
diff --git a/doTaxonomy.h b/doTaxonomy.h
index 6901d27..6f83f19 100644
--- a/doTaxonomy.h
+++ b/doTaxonomy.h
@@ -30,6 +30,7 @@ class PhyloTree {
 
 public:
 	PhyloTree();
+	~PhyloTree() {};
 	void addSeqToTree(string, string);
 	void assignHeirarchyIDs(int);
 	void print(ofstream&);
diff --git a/hclustercommand.cpp b/hclustercommand.cpp
index 2ad6333..3855950 100644
--- a/hclustercommand.cpp
+++ b/hclustercommand.cpp
@@ -330,10 +330,10 @@ vector<seqDist> HClusterCommand::getSeqs(ifstream& filehandle){
 			map<string,int>::iterator itB = globaldata->nameMap->find(secondName);
 			
 			if(itA == globaldata->nameMap->end()){
-				cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
+				cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);
 			}
 			if(itB == globaldata->nameMap->end()){
-				cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
+				cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);
 			}
 			
 			//using cutoff
diff --git a/knn.cpp b/knn.cpp
index 0a62e74..706d243 100644
--- a/knn.cpp
+++ b/knn.cpp
@@ -10,7 +10,7 @@
 #include "knn.h"
 
 /**************************************************************************************************/
-Knn::Knn(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch, int n) 
+Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n) 
 : Classify(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch), num(n)  {}
 /**************************************************************************************************/
 string Knn::getTaxonomy(Sequence* seq) {
@@ -18,8 +18,9 @@ string Knn::getTaxonomy(Sequence* seq) {
 		string tax;
 		
 		//use database to find closest seq
+
 		vector<int> closest = database->findClosestSequences(seq, num);
-		
+
 		vector<string> closestNames;
 		for (int i = 0; i < closest.size(); i++) {
 			//find that sequences taxonomy in map
@@ -39,6 +40,7 @@ string Knn::getTaxonomy(Sequence* seq) {
 			if (tax == "") { mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); mothurOutEndLine(); tax = "bad seq"; }
 		}
 		
+		simpleTax = tax;
 		return tax;	
 	}
 	catch(exception& e) {
diff --git a/knn.h b/knn.h
index 15a33b0..2c809c2 100644
--- a/knn.h
+++ b/knn.h
@@ -18,7 +18,7 @@
 class Knn : public Classify {
 	
 public:
-	Knn(string, string, string, int, int, int, int, int, int);
+	Knn(string, string, string, int, float, float, float, float, int);
 	~Knn() {};
 	
 	string getTaxonomy(Sequence*);
diff --git a/mothur.cpp b/mothur.cpp
index 4618927..a676c83 100644
--- a/mothur.cpp
+++ b/mothur.cpp
@@ -43,7 +43,7 @@ int main(int argc, char *argv[]){
 		//header
 		mothurOut("mothur v.1.6.0");
 		mothurOutEndLine();		
-		mothurOut("Last updated: 10/6/2009");
+		mothurOut("Last updated: 11/20/2009");
 		mothurOutEndLine();	
 		mothurOutEndLine();		
 		mothurOut("by");
diff --git a/readotucommand.cpp b/readotucommand.cpp
index 584eb85..b58dc60 100644
--- a/readotucommand.cpp
+++ b/readotucommand.cpp
@@ -119,7 +119,7 @@ void ReadOtuCommand::help(){
 	try {
 		mothurOut("The read.otu command must be run before you execute a collect.single, rarefaction.single, summary.single, \n");
 		mothurOut("collect.shared, rarefaction.shared or summary.shared command.   Mothur will generate a .list, .rabund and .sabund upon completion of the cluster command \n");
-		mothurOut("or you may use your own. The read.otu command parameter options are list, rabund, sabund, shared, group, order and label.\n");
+		mothurOut("or you may use your own. The read.otu command parameter options are list, rabund, sabund, shared, group, order, label and groups.\n");
 		mothurOut("The read.otu command can be used in two ways.  The first is to read a list, rabund or sabund and run the collect.single, rarefaction.single or summary.single.\n");
 		mothurOut("For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile, label=yourLabels).\n");
 		mothurOut("The list, rabund or sabund parameter is required, but you may only use one of them.\n");
@@ -128,6 +128,7 @@ void ReadOtuCommand::help(){
 		mothurOut("In this case the read.otu command should be in the following format: read.otu(list=yourListFile, group=yourGroupFile) or read.otu(shared=yourSharedFile).  \n");
 		mothurOut("The list parameter and group paramaters or the shared paremeter is required. When using the command the second way with a list and group file read.otu command parses the .list file\n");
 		mothurOut("and separates it into groups.  It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .rabund file for each group. \n");
+		mothurOut("You can use the groups parameter to choose only specific groups to be used in the .shared and .rabund files. \n");
 		mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
 
 	}
-- 
2.39.2