From: westcott <westcott>
Date: Fri, 28 May 2010 12:20:41 +0000 (+0000)
Subject: changed hard parameter in cluster commands
X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=47faf5463d44570ad66148384763db1c8238b563

changed hard parameter in cluster commands
---

diff --git a/clustercommand.cpp b/clustercommand.cpp
index 9ed67e0..30c45e1 100644
--- a/clustercommand.cpp
+++ b/clustercommand.cpp
@@ -62,7 +62,7 @@ ClusterCommand::ClusterCommand(string option)  {
 			temp = validParameter.validFile(parameters, "cutoff", false);
 			if (temp == "not found") { temp = "10"; }
 			convert(temp, cutoff); 
-			if (!hard) {	cutoff += (5 / (precision * 10.0));  }
+			cutoff += (5 / (precision * 10.0));  
 			
 			method = validParameter.validFile(parameters, "method", false);
 			if (method == "not found") { method = "furthest"; }
@@ -181,7 +181,12 @@ int ClusterCommand::execute(){
 			cluster->update(cutoff);
 	
 			float dist = matrix->getSmallDist();
-			float rndDist = roundDist(dist, precision);
+			float rndDist;
+			if (hard) {
+				rndDist = ceilDist(dist, precision); 
+			}else{
+				rndDist = roundDist(dist, precision); 
+			}
 
 			if(previousDist <= 0.0000 && dist != previousDist){
 				printData("unique");
diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp
index e9ab530..4b3c27b 100644
--- a/clustersplitcommand.cpp
+++ b/clustersplitcommand.cpp
@@ -27,7 +27,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
 		
 		else {
 			//valid paramters for this command
-			string Array[] =  {"phylip","column","name","cutoff","precision","method","showabund","timing","hard","processors","outputdir","inputdir"};
+			string Array[] =  {"phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","showabund","timing","hard","processors","outputdir","inputdir"};
 			vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 			
 			OptionParser parser(option);
@@ -76,6 +76,14 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
 					//if the user has not given a path then, add inputdir. else leave path alone.
 					if (path == "") {	parameters["name"] = inputDir + it->second;		}
 				}
+				
+				it = parameters.find("taxonomy");
+				//user has given a template file
+				if(it != parameters.end()){ 
+					path = hasPath(it->second);
+					//if the user has not given a path then, add inputdir. else leave path alone.
+					if (path == "") {	parameters["taxonomy"] = inputDir + it->second;		}
+				}
 			}
 			
 			//check for required parameters
@@ -93,11 +101,15 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
 			if (namefile == "not open") { abort = true; }	
 			else if (namefile == "not found") { namefile = ""; }
 			
+			taxFile = validParameter.validFile(parameters, "taxonomy", true);
+			if (taxFile == "not open") { abort = true; }	
+			else if (taxFile == "not found") { taxFile = ""; }
+			
 			if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a cluster.split command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
 			else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
 		
 			if (columnfile != "") {
-				if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
+				if (namefile == "") { m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); abort = true; }
 			}
 					
 			//check for optional parameter and set defaults
@@ -119,13 +131,19 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
 			temp = validParameter.validFile(parameters, "cutoff", false);
 			if (temp == "not found") { temp = "10"; }
 			convert(temp, cutoff); 
-			if (!hard) {	cutoff += (5 / (precision * 10.0));  }
+			cutoff += (5 / (precision * 10.0));  
 			
-			method = validParameter.validFile(parameters, "method", false);
-			if (method == "not found") { method = "furthest"; }
+			method = validParameter.validFile(parameters, "method", false);		if (method == "not found") { method = "furthest"; }
+			
+			splitmethod = validParameter.validFile(parameters, "splitmethod", false);		if (splitmethod == "not found") { method = "distance"; }
 			
 			if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
 			else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
+			
+			if ((splitmethod == "distance") || (splitmethod == "classify")) { }
+			else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance or classify."); m->mothurOutEndLine(); abort = true; }
+			
+			if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
 
 			showabund = validParameter.validFile(parameters, "showabund", false);
 			if (showabund == "not found") { showabund = "T"; }
@@ -145,11 +163,13 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
 
 void ClusterSplitCommand::help(){
 	try {
-		m->mothurOut("The cluster command can only be executed after a successful read.dist command.\n");
-		m->mothurOut("The cluster command parameter options are method, cuttoff, hard, precision, showabund and timing. No parameters are required.\n");
-		m->mothurOut("The cluster command should be in the following format: \n");
-		m->mothurOut("cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
-		m->mothurOut("The acceptable cluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");	
+		m->mothurOut("The cluster.split command parameter options are cutoff, splitcutoff, precision, method, splitmethod, phylip, column, name, showabund, timing. Phylip or column and name are required.\n");
+		m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n");
+		m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
+		m->mothurOut("The cluster.split command should be in the following format: \n");
+		m->mothurOut("cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
+		m->mothurOut("Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify) \n");	
+
 	}
 	catch(exception& e) {
 		m->errorOut(e, "ClusterSplitCommand", "help");
@@ -203,7 +223,7 @@ int ClusterSplitCommand::execute(){
 		time_t estart = time(NULL);
 		
 		//split matrix into non-overlapping groups
-		SplitMatrix* split = new SplitMatrix(distfile, namefile, cutoff);
+		SplitMatrix* split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod);
 		split->split();
 		
 		if (m->control_pressed) { delete split; return 0; }
@@ -212,11 +232,11 @@ int ClusterSplitCommand::execute(){
 		vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 		delete split;
 		
+		if (m->control_pressed) { return 0; }
+		
 		m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
 		estart = time(NULL);
 		
-		if (m->control_pressed) { return 0; }
-		
 		//****************** break up files between processes and cluster each file set ******************************//
 		vector<string> listFileNames;
 		set<string> labels;
@@ -586,7 +606,12 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
 				cluster->update(cutoff);
 	
 				float dist = matrix->getSmallDist();
-				float rndDist = roundDist(dist, precision);
+				float rndDist;
+				if (hard) {
+					rndDist = ceilDist(dist, precision); 
+				}else{
+					rndDist = roundDist(dist, precision); 
+				}
 
 				if(previousDist <= 0.0000 && dist != previousDist){
 					oldList.setLabel("unique");
diff --git a/clustersplitcommand.h b/clustersplitcommand.h
index 4d1f435..631a8b6 100644
--- a/clustersplitcommand.h
+++ b/clustersplitcommand.h
@@ -32,7 +32,7 @@ private:
 	vector<int> processIDS;   //processid
 	vector<string> outputNames;
 
-	string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing;
+	string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing, splitmethod, taxFile;
 	double cutoff, splitcutoff;
 	int precision, length, processors;
 	bool print_start, abort, hard;
diff --git a/hclustercommand.cpp b/hclustercommand.cpp
index dc14e16..b4d6017 100644
--- a/hclustercommand.cpp
+++ b/hclustercommand.cpp
@@ -110,7 +110,7 @@ HClusterCommand::HClusterCommand(string option)  {
 			temp = validParameter.validFile(parameters, "cutoff", false);
 			if (temp == "not found") { temp = "10"; }
 			convert(temp, cutoff); 
-			if (!hard) {  cutoff += (5 / (precision * 10.0));  }
+			cutoff += (5 / (precision * 10.0)); 
 			
 			method = validParameter.validFile(parameters, "method", false);
 			if (method == "not found") { method = "furthest"; }
@@ -163,7 +163,7 @@ void HClusterCommand::help(){
 		m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
 		m->mothurOut("The hcluster command should be in the following format: \n");
 		m->mothurOut("hcluster(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
-		m->mothurOut("The acceptable hcluster methods are furthest and nearest, but we hope to add average in the future.\n\n");	
+		m->mothurOut("The acceptable hcluster methods are furthest, nearest and average.\n\n");	
 	}
 	catch(exception& e) {
 		m->errorOut(e, "HClusterCommand", "help");
@@ -279,8 +279,14 @@ int HClusterCommand::execute(){
 						return 0;  
 					}
 
-					
-					float rndDist = roundDist(seqs[i].dist, precision);
+			
+					float rndDist;
+					if (hard) {
+						rndDist = ceilDist(seqs[i].dist, precision); 
+					}else{
+						rndDist = roundDist(seqs[i].dist, precision); 
+					}
+
 					
 					if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
 						printData("unique");
diff --git a/makefile b/makefile
index fdcef27..f50b554 100644
--- a/makefile
+++ b/makefile
@@ -1691,7 +1691,7 @@ install : mothur
 	$(CC) $(CC_OPTIONS) splitabundcommand.cpp -c $(INCLUDE) -o ./splitabundcommand.o
 	
 # Item # 206 -- splitmatrix --
-./splitmatrix.o : splitmatrix.o
+./splitmatrix.o : splitmatrix.cpp
 	$(CC) $(CC_OPTIONS) splitmatrix.cpp -c $(INCLUDE) -o ./splitmatrix.o
 	
 # Item # 207 -- splitmatrix --
diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp
index 6eb9c7d..023f214 100644
--- a/mgclustercommand.cpp
+++ b/mgclustercommand.cpp
@@ -20,7 +20,7 @@ MGClusterCommand::MGClusterCommand(string option) {
 		
 		else {
 			//valid paramters for this command
-			string Array[] =  {"blast", "method", "name", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
+			string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
 			vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 			
 			OptionParser parser(option);
@@ -104,6 +104,9 @@ MGClusterCommand::MGClusterCommand(string option) {
 			
 			temp = validParameter.validFile(parameters, "hcluster", false);			if (temp == "not found") { temp = "false"; }
 			hclusterWanted = isTrue(temp); 
+			
+			temp = validParameter.validFile(parameters, "hard", false);			if (temp == "not found") { temp = "F"; }
+			hard = isTrue(temp);
 		}
 
 	}
@@ -214,7 +217,13 @@ int MGClusterCommand::execute(){
 				}
 				
 				float dist = distMatrix->getSmallDist();
-				float rndDist = roundDist(dist, precision);
+				float rndDist;
+				if (hard) {
+					rndDist = ceilDist(dist, precision); 
+				}else{
+					rndDist = roundDist(dist, precision); 
+				}
+
 				
 				if(previousDist <= 0.0000 && dist != previousDist){
 					oldList.setLabel("unique");
@@ -329,7 +338,12 @@ int MGClusterCommand::execute(){
 							return 0; 
 						}
 	
-						float rndDist = roundDist(seqs[i].dist, precision);
+						float rndDist;
+						if (hard) {
+							rndDist = ceilDist(seqs[i].dist, precision); 
+						}else{
+							rndDist = roundDist(seqs[i].dist, precision); 
+						}
 												
 						if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
 							oldList.setLabel("unique");
diff --git a/mgclustercommand.h b/mgclustercommand.h
index a1dac22..4ef0bd9 100644
--- a/mgclustercommand.h
+++ b/mgclustercommand.h
@@ -43,7 +43,7 @@ private:
 	double cutoff;
 	float penalty;
 	int precision, length, precisionLength;
-	bool abort, minWanted, hclusterWanted, merge;
+	bool abort, minWanted, hclusterWanted, merge, hard;
 	
 	void printData(ListVector*);
 	ListVector* mergeOPFs(map<string, int>, float);
diff --git a/mothur.h b/mothur.h
index ca19aea..5d12977 100644
--- a/mothur.h
+++ b/mothur.h
@@ -254,6 +254,13 @@ inline float roundDist(float dist, int precision){
 	
 	return int(dist * precision + 0.5)/float(precision);
 	
+}
+/***********************************************************************/
+
+inline float ceilDist(float dist, int precision){
+	
+	return int(ceil(dist * precision))/float(precision);
+	
 }
 
 /***********************************************************************/
diff --git a/phylotree.h b/phylotree.h
index a961721..50bbb1d 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -45,10 +45,12 @@ public:
 	TaxNode get(string seqName);
 	string getName(int i);			
 	int getIndex(string seqName);	
-			
 	string getFullTaxonomy(string);	 //pass a sequence name return taxonomy
-	int getMaxLevel()				{	return maxLevel;						}
-	int getNumSeqs()  {  return numSeqs;  }
+	
+	int getMaxLevel()		{	return maxLevel;	}
+	int getNumSeqs()		{	return numSeqs;		}
+	int getNumNodes()		{	return tree.size();	}
+	
 	bool ErrorCheck(vector<string>);
 	
 private:
diff --git a/readdistcommand.cpp b/readdistcommand.cpp
index 414f330..4ea630f 100644
--- a/readdistcommand.cpp
+++ b/readdistcommand.cpp
@@ -22,7 +22,7 @@ ReadDistCommand::ReadDistCommand(string option) {
 		
 		else {
 			//valid paramters for this command
-			string Array[] =  {"phylip", "column", "name", "cutoff","hard", "precision", "group","outputdir","inputdir","sim"};
+			string Array[] =  {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"};
 			vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 			
 			OptionParser parser(option);
@@ -126,12 +126,9 @@ ReadDistCommand::ReadDistCommand(string option) {
 			sim = isTrue(temp); 
 			globaldata->sim = sim;
 			
-			temp = validParameter.validFile(parameters, "hard", false);			if (temp == "not found") { temp = "F"; }
-			hard = isTrue(temp);
-			
 			temp = validParameter.validFile(parameters, "cutoff", false);			if (temp == "not found") { temp = "10"; }
 			convert(temp, cutoff); 
-			if (!hard) {  cutoff += (5 / (precision * 10.0));  }
+			cutoff += (5 / (precision * 10.0)); 
 			
 			if (abort == false) {
 				distFileName = globaldata->inputFileName;
diff --git a/readdistcommand.h b/readdistcommand.h
index 6241f97..937ca3f 100644
--- a/readdistcommand.h
+++ b/readdistcommand.h
@@ -42,7 +42,7 @@ private:
 	string phylipfile, columnfile, namefile, groupfile, outputDir;
 	NameAssignment* nameMap;
 
-	bool abort, sim, hard;
+	bool abort, sim;
 
 };
 
diff --git a/splitmatrix.cpp b/splitmatrix.cpp
index c52c287..0b59fa2 100644
--- a/splitmatrix.cpp
+++ b/splitmatrix.cpp
@@ -8,19 +8,44 @@
  */
 
 #include "splitmatrix.h"
+#include "phylotree.h"
 
 /***********************************************************************/
 
-SplitMatrix::SplitMatrix(string distfile, string name, float c){
+SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, string t){
 	m = MothurOut::getInstance();
 	distFile = distfile;
 	cutoff = c;
 	namefile = name;
+	method = t;
+	taxFile = tax;
 }
 
 /***********************************************************************/
 
 int SplitMatrix::split(){
+	try {
+        
+		if (method == "distance") {  
+			splitDistance();
+		}else if (method == "classify") {
+			splitClassify();
+		}else {
+			m->mothurOut("Unknown splitting method, aborting split."); m->mothurOutEndLine();
+			map<string, string> temp;
+			temp[distFile] = namefile;
+			dists.push_back(temp);
+		}
+		
+		return 0;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "SplitMatrix", "split");
+		exit(1);
+	}
+}
+/***********************************************************************/
+int SplitMatrix::splitDistance(){
 	try {
         
 		vector<set<string> > groups;
@@ -36,6 +61,8 @@ int SplitMatrix::split(){
 
 			dFile >> seqA >> seqB >> dist;
 			
+			if (m->control_pressed) {  outFile.close(); dFile.close();  for(int i=0;i<numGroups;i++){	if(groups[i].size() > 0){  remove((distFile + "." + toString(i) + ".temp").c_str()); }  } return 0; }
+					
 			if(dist < cutoff){
 				//cout << "in cutoff: " << dist << endl;
 				int groupIDA = -1;
@@ -172,7 +199,7 @@ int SplitMatrix::split(){
 				smallNameFile.close();
 			}
 		}
-			
+		
 		//names of singletons
 		if (nameMap.size() != 0) {
 			singleton = namefile + ".extra.temp";
@@ -193,12 +220,160 @@ int SplitMatrix::split(){
 				dists.push_back(temp);
 			}
 		}
+		
+		if (m->control_pressed)	 {  
+			for (int i = 0; i < dists.size(); i++) { 
+				remove((dists[i].begin()->first).c_str());
+				remove((dists[i].begin()->second).c_str());
+			}
+			dists.clear();
+		}
+		
+		return 0;
+			
+	}
+	catch(exception& e) {
+		m->errorOut(e, "SplitMatrix", "splitDistance");
+		exit(1);
+	}
+}
+
+/***********************************************************************/
+int SplitMatrix::splitClassify(){
+	try {
+		map<string, int> seqGroup;
+		map<string, int>::iterator it;
+		map<string, int>::iterator it2;
+		
+		int numGroups = 0;
+		
+		//build tree from users taxonomy file
+		PhyloTree* phylo = new PhyloTree();
+		
+		ifstream in;
+		openInputFile(taxFile, in);
+			
+		//read in users taxonomy file and add sequences to tree
+		string seqname, tax;
+		while(!in.eof()){
+			in >> seqname >> tax; gobble(in);
+				
+			phylo->addSeqToTree(seqname, tax);
+		}
+		in.close();
+		
+		phylo->assignHeirarchyIDs(0);
+
+		//make sure the cutoff is not greater than maxlevel
+		if (cutoff > phylo->getMaxLevel()) { m->mothurOut("splitcutoff is greater than the longest taxonomy, using " + toString(phylo->getMaxLevel())); m->mothurOutEndLine(); cutoff = phylo->getMaxLevel(); }
+		
+		//for each node in tree
+		for (int i = 0; i < phylo->getNumNodes(); i++) {
+		
+			//is this node within the cutoff
+			TaxNode taxon = phylo->get(i);
+			
+			if (taxon.level == cutoff) {//if yes, then create group containing this nodes sequences
+				if (taxon.children.size() > 1) { //if this taxon just has one seq its a singleton
+					for (it = taxon.children.begin(); it != taxon.children.end(); it++) {
+						seqGroup[it->first] = numGroups;
+					}
+					numGroups++;
+				}
+			}
+		}
+
+		ifstream dFile;
+		openInputFile(distFile, dFile);
+		ofstream outFile;
+		
+		for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+			remove((distFile + "." + toString(i) + ".temp").c_str());
+		}
+		
+		//for each distance
+		while(dFile){
+			string seqA, seqB;
+			float dist;
+			
+			if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str());	} }
+			
+			dFile >> seqA >> seqB >> dist;  gobble(dFile);
+			
+			//if both sequences are in the same group then they are within the cutoff
+			it = seqGroup.find(seqA);
+			it2 = seqGroup.find(seqB);
+			
+			if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons 
+				if (it->second == it2->second) { //they are from the same group so add the distance
+					openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
+					outFile << seqA << '\t' << seqB << '\t' << dist << endl;
+					outFile.close();
+				}
+			}
+		}
+		dFile.close();
+	
+		
+		for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+			remove((namefile + "." + toString(i) + ".temp").c_str());
+		}
+		
+		ifstream bigNameFile;
+		openInputFile(namefile, bigNameFile);
+		
+		singleton = namefile + ".extra.temp";
+		ofstream remainingNames;
+		openOutputFile(singleton, remainingNames);
+		
+		bool wroteExtra = false;
+						
+		string name, nameList;
+		while(!bigNameFile.eof()){
+			bigNameFile >> name >> nameList;  gobble(bigNameFile);
+			
+			//did this sequence get assigned a group
+			it = seqGroup.find(name);
+			
+			if (it != seqGroup.end()) {  
+				openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
+				outFile << name << '\t' << nameList << endl;
+				outFile.close();
+			}else{
+				wroteExtra = true;
+				remainingNames << name << '\t' << nameList << endl;
+			}
+		}
+		bigNameFile.close();
+		remainingNames.close();
+		
+		if (!wroteExtra) { 
+			remove(singleton.c_str());
+			singleton = "none";
+		}
+			
+		for(int i=0;i<numGroups;i++){
+			string tempNameFile = namefile + "." + toString(i) + ".temp";
+			string tempDistFile = distFile + "." + toString(i) + ".temp";
 				
+			map<string, string> temp;
+			temp[tempDistFile] = tempNameFile;
+			dists.push_back(temp);
+		}
+		
+		if (m->control_pressed)	 {  
+			for (int i = 0; i < dists.size(); i++) { 
+				remove((dists[i].begin()->first).c_str());
+				remove((dists[i].begin()->second).c_str());
+			}
+			dists.clear();
+		}
+		
 		return 0;
 			
 	}
 	catch(exception& e) {
-		m->errorOut(e, "SplitMatrix", "split");
+		m->errorOut(e, "SplitMatrix", "splitClassify");
 		exit(1);
 	}
 }
diff --git a/splitmatrix.h b/splitmatrix.h
index 5974ff1..e38f8fc 100644
--- a/splitmatrix.h
+++ b/splitmatrix.h
@@ -19,18 +19,21 @@ class SplitMatrix  {
 	
 	public:
 
-		SplitMatrix(string, string, float); //column formatted distance file, namesfile, cutoff
+		SplitMatrix(string, string, string, float, string); //column formatted distance file, namesfile, cutoff, method
 		~SplitMatrix();
 		int split();
 		vector< map<string, string> > getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 		string getSingletonNames() { return singleton; } //returns namesfile containing singletons
 	
 	private:
-		string distFile, namefile, singleton;
+		MothurOut* m;
+
+		string distFile, namefile, singleton, method, taxFile;
 		vector< map< string, string> > dists;
 		float cutoff;
-		
-		MothurOut* m;
+				
+		int splitDistance();
+		int splitClassify();
 };
 
 /******************************************************/