From: westcott <westcott>
Date: Fri, 5 Nov 2010 20:02:24 +0000 (+0000)
Subject: fixed metastats, added resize to cluster.classic, added code to kill children if... 
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=89f19f9c6ab89c2f6c7c6921a328ae87bce6f8e3;p=mothur.git

fixed metastats, added resize to cluster.classic, added code to kill children if parent is unable to spawn and dies.
---

diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj
index 1c282d9..855dad8 100644
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -1124,7 +1124,6 @@
 			};
 			buildConfigurationList = 1DEB919308733D9F0010E9CD /* Build configuration list for PBXProject "Mothur" */;
 			compatibilityVersion = "Xcode 3.0";
-			developmentRegion = English;
 			hasScannedForEncodings = 1;
 			knownRegions = (
 				English,
diff --git a/clusterclassic.cpp b/clusterclassic.cpp
index 41c1647..0e48690 100644
--- a/clusterclassic.cpp
+++ b/clusterclassic.cpp
@@ -226,7 +226,7 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) {
 //sets smallCol and smallRow, returns distance
 double ClusterClassic::getSmallCell() {
 	try {
-		
+			
 		smallDist = aboveCutoff;
 		smallRow = 1;
 		smallCol = 0;
@@ -279,6 +279,10 @@ void ClusterClassic::clusterBins(){
 
 		rabund->set(smallRow, rabund->get(smallRow)+rabund->get(smallCol));	
 		rabund->set(smallCol, 0);	
+		for (int i = smallCol+1; i < rabund->size(); i++) {
+			rabund->set((i-1), rabund->get(i));
+		}
+		rabund->resize((rabund->size()-1));
 		rabund->setLabel(toString(smallDist));
 
 	//	cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
@@ -296,6 +300,10 @@ void ClusterClassic::clusterNames(){
 		
 		list->set(smallRow, list->get(smallRow)+','+list->get(smallCol));
 		list->set(smallCol, "");	
+		for (int i = smallCol+1; i < list->size(); i++) {
+			list->set((i-1), list->get(i));
+		}
+		list->resize((list->size()-1));
 		list->setLabel(toString(smallDist));
 	
 	//	cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
@@ -308,36 +316,21 @@ void ClusterClassic::clusterNames(){
 /***********************************************************************/
 void ClusterClassic::update(double& cutOFF){
 	try {
-//cout << "before update " << endl;
 //print();		
 		getSmallCell();
 		
 		int r, c;
 		r = smallRow; c = smallCol;
-		//because we only store lt, we need to make sure we grab the right location
-		//if (smallRow < smallCol) { c = smallRow; r = smallCol; } //smallRow is really our column value
-		//else { r = smallRow; c = smallCol; } //smallRow is the row value
-		
-		//reset rows smallest distance
-		//rowSmallDists[r].dist = aboveCutoff; rowSmallDists[r].row = 0; rowSmallDists[r].col = 0;
-		//rowSmallDists[c].dist = aboveCutoff; rowSmallDists[c].row = 0; rowSmallDists[c].col = 0;
-		
-		//if your rows smallest distance is from smallRow or smallCol, reset
-		//for(int i=0;i<nseqs;i++){
-			//if ((rowSmallDists[i].row == r) || (rowSmallDists[i].row == c) || (rowSmallDists[i].col == r) || (rowSmallDists[i].col == c)) {
-			//	rowSmallDists[i].dist = aboveCutoff; rowSmallDists[i].row = 0; rowSmallDists[i].col = 0;
-			//}
-		//}
-		
+				
 		for(int i=0;i<nseqs;i++){
 			if(i != r && i != c){
 				double distRow, distCol, newDist;
 				if (i > r) { distRow = dMatrix[i][r]; }
 				else { distRow =  dMatrix[r][i]; }
-				
+
 				if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = aboveCutoff; } //like removeCell
 				else { distCol =  dMatrix[c][i]; dMatrix[c][i] = aboveCutoff; }
-					
+				
 				if(method == "furthest"){
 					newDist = max(distRow, distCol);
 				}
@@ -352,39 +345,33 @@ void ClusterClassic::update(double& cutOFF){
 				else if (method == "nearest"){
 					newDist = min(distRow, distCol);
 				}
-				
+				//cout << "newDist = " << newDist << endl;	
 				if (i > r) { dMatrix[i][r] = newDist; }
 				else { dMatrix[r][i] = newDist; }
 				
-				//if (newDist < rowSmallDists[i].dist) {  rowSmallDists[i].dist = newDist; rowSmallDists[i].col = r; rowSmallDists[i].row = i;  }
 			}
-			//cout << "rowsmall = " << i << '\t' << rowSmallDists[i].dist << endl;	
 		}
 			
 		clusterBins();
 		clusterNames();
-	
-		//find new small for 2 rows we just merged
-		//colDist temp(0,0,100.0);
-		//rowSmallDists[r] = temp;
-
-		//for (int i = 0; i < dMatrix[r].size(); i++) {
-		//	if (dMatrix[r][i] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[r][i]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; }
-		//}
-		//for (int i = dMatrix[r].size()+1; i < dMatrix.size(); i++) {
-		//	if (dMatrix[i][dMatrix[r].size()] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[i][dMatrix[r].size()]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; }
-		//}
 		
-		//rowSmallDists[c] = temp;
-		//for (int i = 0; i < dMatrix[c].size(); i++) {
-		//	if (dMatrix[c][i] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[c][i]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; }
-		//}
-		//for (int i = dMatrix[c].size()+1; i < dMatrix.size(); i++) {
-		//	if (dMatrix[i][dMatrix[c].size()] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[i][dMatrix[c].size()]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; }
-		//}
+		//resize each row
+		for(int i=0;i<nseqs;i++){
+			for(int j=c+1;j<dMatrix[i].size();j++){
+				dMatrix[i][j-1]=dMatrix[i][j];
+			}
+		}			
+		
+		//resize each col
+		for(int i=c+1;i<nseqs;i++){
+			for(int j=0;j < dMatrix[i-1].size();j++){
+				dMatrix[i-1][j]=dMatrix[i][j];
+			}
+		}	
 		
-		//cout << "after update " << endl;
-		//print();
+		nseqs--;
+		dMatrix.pop_back();
+
 	}
 	catch(exception& e) {
 		m->errorOut(e, "ClusterClassic", "update");
diff --git a/clusterdoturcommand.cpp b/clusterdoturcommand.cpp
index 4987d4e..56fbd5c 100644
--- a/clusterdoturcommand.cpp
+++ b/clusterdoturcommand.cpp
@@ -227,7 +227,7 @@ int ClusterDoturCommand::execute(){
 		
 		int estart = time(NULL);
 	
-		while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 0)){
+		while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
 			if (m->control_pressed) { delete cluster; delete list; delete rabund; sabundFile.close();rabundFile.close();listFile.close();  for (int i = 0; i < outputNames.size(); i++) {	remove(outputNames[i].c_str()); 	} outputTypes.clear();  return 0;  }
 		
 			cluster->update(cutoff);
diff --git a/commandfactory.cpp b/commandfactory.cpp
index 927f374..3ac3781 100644
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -96,6 +96,7 @@
 #include "deuniqueseqscommand.h"
 #include "pairwiseseqscommand.h"
 #include "clusterdoturcommand.h"
+#include "subsamplecommand.h"
 
 
 /*******************************************************/
@@ -197,6 +198,7 @@ CommandFactory::CommandFactory(){
 	commands["fastq.info"]			= "fastq.info";
 	commands["deunique.seqs"]		= "deunique.seqs";
 	commands["cluster.classic"]		= "cluster.classic";
+	commands["sub.sample"]			= "sub.sample";
 	commands["pairwise.seqs"]		= "MPIEnabled";
 	commands["pipeline.pds"]		= "MPIEnabled";
 	commands["classify.seqs"]		= "MPIEnabled"; 
@@ -344,6 +346,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
 		else if(commandName == "deunique.seqs")			{	command = new DeUniqueSeqsCommand(optionString);			}
 		else if(commandName == "pairwise.seqs")			{	command = new PairwiseSeqsCommand(optionString);			}
 		else if(commandName == "cluster.classic")		{	command = new ClusterDoturCommand(optionString);			}
+		else if(commandName == "sub.sample")			{	command = new SubSampleCommand(optionString);				}
 		else											{	command = new NoCommand(optionString);						}
 
 		return command;
@@ -458,6 +461,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
 		else if(commandName == "deunique.seqs")			{	pipecommand = new DeUniqueSeqsCommand(optionString);			}
 		else if(commandName == "pairwise.seqs")			{	pipecommand = new PairwiseSeqsCommand(optionString);			}
 		else if(commandName == "cluster.classic")		{	pipecommand = new ClusterDoturCommand(optionString);			}
+		else if(commandName == "sub.sample")			{	pipecommand = new SubSampleCommand(optionString);				}
 		else											{	pipecommand = new NoCommand(optionString);						}
 
 		return pipecommand;
@@ -560,6 +564,7 @@ Command* CommandFactory::getCommand(string commandName){
 		else if(commandName == "deunique.seqs")			{	shellcommand = new DeUniqueSeqsCommand();			}
 		else if(commandName == "pairwise.seqs")			{	shellcommand = new PairwiseSeqsCommand();			}
 		else if(commandName == "cluster.classic")		{	shellcommand = new ClusterDoturCommand();			}
+		else if(commandName == "sub.sample")			{	shellcommand = new SubSampleCommand();				}
 		else											{	shellcommand = new NoCommand();						}
 
 		return shellcommand;
diff --git a/datavector.hpp b/datavector.hpp
index 0b09e0d..a5f96eb 100644
--- a/datavector.hpp
+++ b/datavector.hpp
@@ -33,6 +33,7 @@ public:
 	virtual void resize(int) = 0;
 	virtual int size()	= 0;
 	virtual void print(ostream&) = 0;
+	virtual void clear() = 0;
 	
 	void setLabel(string l)		{	label = l;			}
 	string getLabel()			{	return label;		}
diff --git a/getseqscommand.cpp b/getseqscommand.cpp
index 4b93d1f..9fbfb93 100644
--- a/getseqscommand.cpp
+++ b/getseqscommand.cpp
@@ -14,7 +14,7 @@
 //**********************************************************************************************************************
 vector<string> GetSeqsCommand::getValidParameters(){	
 	try {
-		string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
+		string Array[] =  {"fasta","name", "group", "qfile","alignreport", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
 		vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 		return myArray;
 	}
@@ -35,6 +35,7 @@ GetSeqsCommand::GetSeqsCommand(){
 		outputTypes["group"] = tempOutNames;
 		outputTypes["alignreport"] = tempOutNames;
 		outputTypes["list"] = tempOutNames;
+		outputTypes["qfile"] = tempOutNames;
 	}
 	catch(exception& e) {
 		m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
@@ -74,7 +75,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
 		
 		else {
 			//valid paramters for this command
-			string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
+			string Array[] =  {"fasta","name", "group", "alignreport", "qfile", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
 			vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 			
 			OptionParser parser(option);
@@ -96,6 +97,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
 			outputTypes["group"] = tempOutNames;
 			outputTypes["alignreport"] = tempOutNames;
 			outputTypes["list"] = tempOutNames;
+			outputTypes["qfile"] = tempOutNames;
 			
 			//if the user changes the output directory command factory will send this info to us in the output parameter 
 			outputDir = validParameter.validFile(parameters, "outputdir", false);		if (outputDir == "not found"){	outputDir = "";		}
@@ -160,6 +162,14 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
 					//if the user has not given a path then, add inputdir. else leave path alone.
 					if (path == "") {	parameters["taxonomy"] = inputDir + it->second;		}
 				}
+				
+				it = parameters.find("qfile");
+				//user has given a template file
+				if(it != parameters.end()){ 
+					path = m->hasPath(it->second);
+					//if the user has not given a path then, add inputdir. else leave path alone.
+					if (path == "") {	parameters["qfile"] = inputDir + it->second;		}
+				}
 			}
 
 			
@@ -192,11 +202,15 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
 			if (taxfile == "not open") { abort = true; }
 			else if (taxfile == "not found") {  taxfile = "";  }
 			
+			qualfile = validParameter.validFile(parameters, "qfile", true);
+			if (qualfile == "not open") { abort = true; }
+			else if (qualfile == "not found") {  qualfile = "";  }
+			
 			string usedDups = "true";
 			string temp = validParameter.validFile(parameters, "dups", false);	if (temp == "not found") { temp = "false"; usedDups = ""; }
 			dups = m->isTrue(temp);
 			
-			if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+			if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
 		
 			if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }			
 
@@ -212,9 +226,9 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
 
 void GetSeqsCommand::help(){
 	try {
-		m->mothurOut("The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy or alignreport file.\n");
+		m->mothurOut("The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n");
 		m->mothurOut("It outputs a file containing only the sequences in the .accnos file.\n");
-		m->mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide accnos and at least one of the other parameters.\n");
+		m->mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the other parameters.\n");
 		m->mothurOut("The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n");
 		m->mothurOut("The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
 		m->mothurOut("Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
@@ -245,6 +259,7 @@ int GetSeqsCommand::execute(){
 		if (alignfile != "")		{		readAlign();	}
 		if (listfile != "")			{		readList();		}
 		if (taxfile != "")			{		readTax();		}
+		if (qualfile != "")			{		readQual();		}
 		
 		if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {	remove(outputNames[i].c_str());  } return 0; }
 		
@@ -313,6 +328,71 @@ int GetSeqsCommand::readFasta(){
 	}
 }
 //**********************************************************************************************************************
+int GetSeqsCommand::readQual(){
+	try {
+		string thisOutputDir = outputDir;
+		if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
+		string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
+		ofstream out;
+		m->openOutputFile(outputFileName, out);
+		
+		
+		ifstream in;
+		m->openInputFile(qualfile, in);
+		string name;
+		
+		bool wroteSomething = false;
+		
+		
+		while(!in.eof()){	
+			string saveName = "";
+			string name = "";
+			string scores = "";
+			
+			in >> name; 
+				
+			if (name.length() != 0) { 
+				saveName = name.substr(1);
+				while (!in.eof())	{	
+					char c = in.get(); 
+					if (c == 10 || c == 13){	break;	}
+					else { name += c; }	
+				} 
+				m->gobble(in);
+			}
+			
+			while(in){
+				char letter= in.get();
+				if(letter == '>'){	in.putback(letter);	break;	}
+				else{ scores += letter; }
+			}
+			
+			m->gobble(in);
+			
+			if (names.count(saveName) != 0) {
+				wroteSomething = true;
+						
+				out << name << endl << scores;
+			}
+			
+			m->gobble(in);
+		}
+		in.close();
+		out.close();
+		
+		
+		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+		outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
+		
+		return 0;
+		
+	}
+	catch(exception& e) {
+		m->errorOut(e, "GetSeqsCommand", "readQual");
+		exit(1);
+	}
+}
+//**********************************************************************************************************************
 int GetSeqsCommand::readList(){
 	try {
 		string thisOutputDir = outputDir;
diff --git a/getseqscommand.h b/getseqscommand.h
index ea93b5a..b249072 100644
--- a/getseqscommand.h
+++ b/getseqscommand.h
@@ -29,7 +29,7 @@ class GetSeqsCommand : public Command {
 	private:
 		set<string> names;
 		vector<string> outputNames;
-		string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir;
+		string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
 		bool abort, dups;
 		map<string, vector<string> > outputTypes;
 		
@@ -40,6 +40,7 @@ class GetSeqsCommand : public Command {
 		int readAccnos();
 		int readList();
 		int readTax();
+		int readQual();
 		
 };
 
diff --git a/metastats.h b/metastats.h
index 2f3cc62..63fb0ee 100644
--- a/metastats.h
+++ b/metastats.h
@@ -31,7 +31,7 @@ void calc_twosample_ts(double *Pmatrix,int *g,int *nc,int *nr,double
 		       *Ts,double *Tinitial,double *counter1);
 void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *storage);
 void start(double *Imatrix,int *g,int *nr,int *nc,double *testing,
-			double storage[][9]);
+			double**);
 
 int metastat_main (char*, int, int, double, int, double**, int);
 
diff --git a/metastats2.c b/metastats2.c
index 136c042..b8db243 100644
--- a/metastats2.c
+++ b/metastats2.c
@@ -22,18 +22,23 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
   }
  
   // Initialize the matrices
-  size = row*col;
-//printf("size = %d\n.", size);
-  double matrix[row][col];
-  double pmatrix[size],pmatrix2[size],permuted[size];  
-  double storage[row][9];
-//printf("here\n.", size);  
-  for (i=0;i<row;i++){
-  	for (j =0;j<9;j++){
-      storage[i][j]=0; 		
-	}	
-  }
-   
+	size = row*col;
+	
+	double ** matrix, * pmatrix, * permuted, ** storage;
+	matrix = malloc(row*sizeof(double *));
+	storage =  malloc(row*sizeof(double *));
+	for (i = 0; i<row; i++){
+		matrix[i] = malloc(col*sizeof(double));
+	}
+	for (i = 0; i<row;i++){
+		storage[i] = malloc(9*sizeof(double));
+	}
+	
+	pmatrix = (double *) malloc(size*sizeof(double *));
+	permuted = (double *) malloc(size*sizeof(double *));
+	
+		
+	
   for(i=0; i<row; i++){
     for(j=0; j<col;j++){
       matrix[i][j]=data[i][j];
@@ -125,9 +130,12 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
   
   permutes = &B;
   ptt_size = B*row;
-  
+
   //changing ptt_size to row
-  double permuted_ttests[row], pvalues[row], tinitial[row];
+  double * permuted_ttests, * pvalues, * tinitial;
+  permuted_ttests = (double *) malloc(size*sizeof(double *));
+  pvalues = (double *) malloc(size*sizeof(double *));
+  tinitial = (double *) malloc(size*sizeof(double *));
   
   for(i=0;i<row;i++){
     permuted_ttests[i]=0;}
@@ -137,13 +145,17 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
     tinitial[i]=0; }
   
   // Find the initial values for the matrix.
+ 
   start(pmatrix,gvalue,nr,nc,tinitial,storage);
   
   // Start the calculations.
  
   if ( (col==2) || ((g-1)<8) || ((col-g+1) < 8) ){  
 
-  double fish[row], fish2[row];
+  double * fish, *fish2;
+  fish = (double *) malloc(size*sizeof(double *));
+  fish2 = (double *) malloc(size*sizeof(double *));
+	  
   for(i=0;i<row;i++){
   	fish[i]=0;
   	fish2[i]=0;}
@@ -173,7 +185,8 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
 	//   f21   f22
 	
 	int *nr, *nc, *ldtabl, *work;
-	int nrow=2, ncol=2, ldtable=2, workspace=100000;
+	int nrow=2, ncol=2, ldtable=2;
+	int workspace=(row*sizeof(double *)+size*sizeof(double *));
 	double *expect, *prc, *emin,*prt,*pre;
 	double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
 
@@ -187,7 +200,7 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
 	emin=&emin1;
 	prt=&prt1;
 	pre=&pre1;
-	
+
 	fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
 	
 	if (*pre>.999999999){
@@ -198,11 +211,14 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
     }
   }
   else{
-  	 
+printf("here before testp\n");	  	 
   testp(permuted_ttests, permutes, permuted,pmatrix, nc, nr, gvalue,tinitial,pvalues);
   	 
   	       // Checks to make sure the matrix isn't sparse.
-  double sparse[row], sparse2[row];
+  double * sparse, * sparse2;
+  sparse = (double *) malloc(size*sizeof(double *));
+  sparse2 = (double *) malloc(size*sizeof(double *));
+	  
   for(i=0;i<row;i++){
   	sparse[i]=0;
   	sparse2[i]=0;}
@@ -256,7 +272,7 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
 	emin=&emin1;
 	prt=&prt1;
 	pre=&pre1;
-	
+printf("here before fexact2\n");		
 	fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
 	
 	if (*pre>.999999999){
@@ -271,8 +287,12 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
   }
   
   // Calculates the mean of counts (not normalized)
-  double temp[row][2];
-  
+  double ** temp;
+  temp = malloc(row*sizeof(double *));
+  for (i = 0; i<row; i++){
+	 temp[i] = malloc(col*sizeof(double));
+  }
+printf("here\n");	  
   for(j=0;j<row;j++){
   	for(i=0;i<2;i++){
   		temp[j][i]=0;
@@ -512,42 +532,45 @@ void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *store){
 }
 
 void start(double *Imatrix,int *g,int *nr,int *nc,double *initial,
-		double storage[][9]){
+		double** storage){
   int i, a = *nr;
   a*=4;
-  
+	
   double store[a], tool[a], C1[*nr][3], C2[*nr][3];
   double nrows,ncols,gvalue, xbardiff=0, denom=0;
-  
+ 
   nrows = (double) *nr;
   ncols = (double) *nc;
   gvalue= (double) *g;
-  
+   
   meanvar(Imatrix,g,nr,nc,store);
-  
+   
   for(i=0;i<=a-1;i++){
     tool[i]=store[i];
   }
-  for (i=0; i<*nr;i++){
+
+  for (i=0; i<*nr;i++){ 
     C1[i][0]=tool[i]; //mean group 1
     storage[i][0]=C1[i][0];
     C1[i][1]=tool[i+*nr+*nr]; // var group 1
     storage[i][1]=C1[i][1];
     C1[i][2]=C1[i][1]/(gvalue-1);
     storage[i][2]=sqrt(C1[i][2]);
-    
+    printf("here2\n"); 
     C2[i][0]=tool[i+*nr]; // mean group 2
-    storage[i][4]=C2[i][0];    
+    storage[i][4]=C2[i][0]; 
     C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2 
-    storage[i][5]=C2[i][1];        
-    C2[i][2]=C2[i][1]/(ncols-gvalue+1);
+    storage[i][5]=C2[i][1];  
+	C2[i][2]=C2[i][1]/(ncols-gvalue+1);
     storage[i][6]=sqrt(C2[i][2]);   
+	 
   }
+ 
   for (i=0; i<*nr; i++){
     xbardiff = C1[i][0]-C2[i][0];
     denom = sqrt(C1[i][2]+C2[i][2]);
     initial[i]=fabs(xbardiff/denom);
-  }											
+  }	 
 }
 
 
diff --git a/mothur b/mothur
index 6d82146..cd2cdd5 100755
Binary files a/mothur and b/mothur differ
diff --git a/ordervector.cpp b/ordervector.cpp
index 27c1e93..a6dff32 100644
--- a/ordervector.cpp
+++ b/ordervector.cpp
@@ -73,7 +73,14 @@ int OrderVector::getMaxRank(){
 	if(needToUpdate == 1){	updateStats();	}
 	return maxRank;
 }
+/***********************************************************************/
 
+void OrderVector::clear(){
+	numBins = 0;
+	maxRank = 0;
+	numSeqs = 0;
+	data.clear();
+}
 /***********************************************************************/
 
 
diff --git a/ordervector.hpp b/ordervector.hpp
index d652bbf..0139a64 100644
--- a/ordervector.hpp
+++ b/ordervector.hpp
@@ -35,6 +35,7 @@ public:
 	void push_back(int);
 	void resize(int);
 	int size();
+	void clear();
 	void print(string, ostream&);
 	vector<int>::iterator begin();
 	vector<int>::iterator end();
diff --git a/qualityscores.cpp b/qualityscores.cpp
index fa78b69..f76d982 100644
--- a/qualityscores.cpp
+++ b/qualityscores.cpp
@@ -45,7 +45,7 @@ QualityScores::QualityScores(ifstream& qFile, int l){
 		else {
 			seqName = seqName.substr(1); 
 		}
-
+		
 		//m->getline(qFile, line);
 		//istringstream qualStream(line);
 	
@@ -57,6 +57,40 @@ QualityScores::QualityScores(ifstream& qFile, int l){
 		
 		//seqLength = qScores.size();	
 		
+		/*while(!in.eof()){	
+			string saveName = "";
+			string name = "";
+			string scores = "";
+			
+			in >> name; 
+			//cout << name << endl;		
+			if (name.length() != 0) { 
+				saveName = name.substr(1);
+				while (!in.eof())	{	
+					char c = in.get(); 
+					if (c == 10 || c == 13){	break;	}
+					else { name += c; }	
+				} 
+				m->gobble(in);
+			}
+			
+			while(in){
+				char letter= in.get();
+				if(letter == '>'){	in.putback(letter);	break;	}
+				else{ scores += letter; }
+			}
+			
+		 //istringstream iss (scores,istringstream::in);
+		 
+		 //int count = 0; int tempScore;
+		 //while (iss) { iss >> tempScore; count++; }
+		 //cout << saveName << '\t' << count << endl;	
+			
+			m->gobble(in);
+		}*/		
+		
+		
+		
 		for(int i=0;i<seqLength;i++){
 			qFile >> score;
 			qScores.push_back(score);
@@ -70,6 +104,7 @@ QualityScores::QualityScores(ifstream& qFile, int l){
 	}							
 
 }
+/**************************************************************************************************/
 
 /**************************************************************************************************/
 
diff --git a/rabundvector.cpp b/rabundvector.cpp
index 43dbfac..6cbaa0d 100644
--- a/rabundvector.cpp
+++ b/rabundvector.cpp
@@ -114,7 +114,15 @@ int RAbundVector::get(int index){
 	return data[index];
 	
 }
+/***********************************************************************/
 
+void RAbundVector::clear(){
+	numBins = 0;
+	maxRank = 0;
+	numSeqs = 0;
+	data.clear();
+	
+}
 /***********************************************************************/
 
 void RAbundVector::push_back(int binSize){
diff --git a/rabundvector.hpp b/rabundvector.hpp
index fab02a9..9516564 100644
--- a/rabundvector.hpp
+++ b/rabundvector.hpp
@@ -41,6 +41,7 @@ public:
 	int sum();
 	int sum(int);
 	int numNZ();
+	void clear();
 	vector<int>::reverse_iterator rbegin();
 	vector<int>::reverse_iterator rend();
 	
diff --git a/removeseqscommand.cpp b/removeseqscommand.cpp
index f1804ed..384a360 100644
--- a/removeseqscommand.cpp
+++ b/removeseqscommand.cpp
@@ -14,7 +14,7 @@
 //**********************************************************************************************************************
 vector<string> RemoveSeqsCommand::getValidParameters(){	
 	try {
-		string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "list","taxonomy","outputdir","inputdir", "dups" };
+		string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "qfile","list","taxonomy","outputdir","inputdir", "dups" };
 		vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 		return myArray;
 	}
@@ -35,6 +35,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(){
 		outputTypes["group"] = tempOutNames;
 		outputTypes["alignreport"] = tempOutNames;
 		outputTypes["list"] = tempOutNames;
+		outputTypes["qfile"] = tempOutNames;
 	}
 	catch(exception& e) {
 		m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
@@ -74,7 +75,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
 		
 		else {
 			//valid paramters for this command
-			string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "list","taxonomy","outputdir","inputdir", "dups" };
+			string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "qfile", "list","taxonomy","outputdir","inputdir", "dups" };
 			vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 			
 			OptionParser parser(option);
@@ -96,6 +97,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
 			outputTypes["group"] = tempOutNames;
 			outputTypes["alignreport"] = tempOutNames;
 			outputTypes["list"] = tempOutNames;
+			outputTypes["qfile"] = tempOutNames;
 			
 			//if the user changes the output directory command factory will send this info to us in the output parameter 
 			outputDir = validParameter.validFile(parameters, "outputdir", false);		if (outputDir == "not found"){	outputDir = "";		}
@@ -160,6 +162,14 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
 					//if the user has not given a path then, add inputdir. else leave path alone.
 					if (path == "") {	parameters["taxonomy"] = inputDir + it->second;		}
 				}
+				
+				it = parameters.find("qfile");
+				//user has given a template file
+				if(it != parameters.end()){ 
+					path = m->hasPath(it->second);
+					//if the user has not given a path then, add inputdir. else leave path alone.
+					if (path == "") {	parameters["qfile"] = inputDir + it->second;		}
+				}
 			}
 
 			
@@ -191,6 +201,10 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
 			taxfile = validParameter.validFile(parameters, "taxonomy", true);
 			if (taxfile == "not open") { abort = true; }
 			else if (taxfile == "not found") {  taxfile = "";  }
+			
+			qualfile = validParameter.validFile(parameters, "qfile", true);
+			if (qualfile == "not open") { abort = true; }
+			else if (qualfile == "not found") {  qualfile = "";  }			
 
 			
 			string usedDups = "true";
@@ -201,7 +215,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
 			}
 			dups = m->isTrue(temp);
 			
-			if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, alignreport or list."); m->mothurOutEndLine(); abort = true; }
+			if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
 			
 			if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }			
 		}
@@ -216,9 +230,9 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
 
 void RemoveSeqsCommand::help(){
 	try {
-		m->mothurOut("The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy or alignreport file.\n");
+		m->mothurOut("The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n");
 		m->mothurOut("It outputs a file containing the sequences NOT in the .accnos file.\n");
-		m->mothurOut("The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n");
+		m->mothurOut("The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n");
 		m->mothurOut("The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n");
 		m->mothurOut("The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
 		m->mothurOut("Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
@@ -249,6 +263,7 @@ int RemoveSeqsCommand::execute(){
 		if (alignfile != "")		{		readAlign();	}
 		if (listfile != "")			{		readList();		}
 		if (taxfile != "")			{		readTax();		}
+		if (qualfile != "")			{		readQual();		}
 		
 		if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	remove(outputNames[i].c_str()); } return 0; }
 		
@@ -304,7 +319,7 @@ int RemoveSeqsCommand::readFasta(){
 		out.close();
 		
 		if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-		outputTypes["fasta"].push_back(outputFileName); 
+		outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
 		
 		return 0;
 		
@@ -315,6 +330,71 @@ int RemoveSeqsCommand::readFasta(){
 	}
 }
 //**********************************************************************************************************************
+int RemoveSeqsCommand::readQual(){
+	try {
+		string thisOutputDir = outputDir;
+		if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
+		string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
+		ofstream out;
+		m->openOutputFile(outputFileName, out);
+		
+		
+		ifstream in;
+		m->openInputFile(qualfile, in);
+		string name;
+		
+		bool wroteSomething = false;
+		
+		
+		while(!in.eof()){	
+			string saveName = "";
+			string name = "";
+			string scores = "";
+			
+			in >> name; 
+			
+			if (name.length() != 0) { 
+				saveName = name.substr(1);
+				while (!in.eof())	{	
+					char c = in.get(); 
+					if (c == 10 || c == 13){	break;	}
+					else { name += c; }	
+				} 
+				m->gobble(in);
+			}
+			
+			while(in){
+				char letter= in.get();
+				if(letter == '>'){	in.putback(letter);	break;	}
+				else{ scores += letter; }
+			}
+			
+			m->gobble(in);
+			
+			if (names.count(saveName) == 0) {
+				wroteSomething = true;
+				
+				out << name << endl << scores;
+			}
+			
+			m->gobble(in);
+		}
+		in.close();
+		out.close();
+		
+		
+		if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
+		outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
+		
+		return 0;
+		
+	}
+	catch(exception& e) {
+		m->errorOut(e, "RemoveSeqsCommand", "readQual");
+		exit(1);
+	}
+}
+//**********************************************************************************************************************
 int RemoveSeqsCommand::readList(){
 	try {
 		string thisOutputDir = outputDir;
@@ -375,7 +455,7 @@ int RemoveSeqsCommand::readList(){
 		out.close();
 		
 		if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-		outputTypes["list"].push_back(outputFileName); 
+		outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
 				
 		return 0;
 
@@ -461,7 +541,7 @@ int RemoveSeqsCommand::readName(){
 		out.close();
 
 		if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-		outputTypes["name"].push_back(outputFileName);
+		outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
 				
 		return 0;
 	}
@@ -505,7 +585,7 @@ int RemoveSeqsCommand::readGroup(){
 		out.close();
 		
 		if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-		outputTypes["group"].push_back(outputFileName); 
+		outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
 		
 		return 0;
 	}
@@ -547,7 +627,7 @@ int RemoveSeqsCommand::readTax(){
 		out.close();
 		
 		if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-		outputTypes["taxonomy"].push_back(outputFileName);
+		outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
 		
 		return 0;
 	}
@@ -613,7 +693,7 @@ int RemoveSeqsCommand::readAlign(){
 		out.close();
 		
 		if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-		outputTypes["alignreport"].push_back(outputFileName);
+		outputTypes["alignreport"].push_back(outputFileName); outputNames.push_back(outputFileName);
 		
 		return 0;
 		
diff --git a/removeseqscommand.h b/removeseqscommand.h
index 8321212..de1e3d9 100644
--- a/removeseqscommand.h
+++ b/removeseqscommand.h
@@ -28,7 +28,7 @@ class RemoveSeqsCommand : public Command {
 		
 	private:
 		set<string> names;
-		string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir;
+		string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
 		bool abort, dups;
 		vector<string> outputNames;
 		map<string, vector<string> > outputTypes;
@@ -40,6 +40,7 @@ class RemoveSeqsCommand : public Command {
 		void readAccnos();
 		int readList();
 		int readTax();
+		int readQual();
 		
 };
 
diff --git a/sabundvector.cpp b/sabundvector.cpp
index 6e4401b..1bceec2 100644
--- a/sabundvector.cpp
+++ b/sabundvector.cpp
@@ -153,7 +153,13 @@ void SAbundVector::print(string prefix, ostream& output){
 	}
 	output << endl;
 }
-
+/***********************************************************************/
+void SAbundVector::clear(){
+	numBins = 0;
+	maxRank = 0;
+	numSeqs = 0;
+	data.clear();	
+}
 /***********************************************************************/
 void SAbundVector::print(ostream& output){
 	try {
diff --git a/sabundvector.hpp b/sabundvector.hpp
index 769a35d..561294c 100644
--- a/sabundvector.hpp
+++ b/sabundvector.hpp
@@ -40,6 +40,7 @@ public:
 	int sum();
 	void resize(int);
 	int size();
+	void clear();
 
 	void print(ostream&);
 	void print(string, ostream&);
diff --git a/sharedordervector.cpp b/sharedordervector.cpp
index 3944212..15ee7fc 100644
--- a/sharedordervector.cpp
+++ b/sharedordervector.cpp
@@ -171,7 +171,14 @@ void SharedOrderVector::print(ostream& output){
 	}
 }
 
+/***********************************************************************/
 
+void SharedOrderVector::clear(){
+	numBins = 0;
+	maxRank = 0;
+	numSeqs = 0;
+	data.clear();
+}
 /***********************************************************************/
 
 void SharedOrderVector::resize(int){
diff --git a/sharedordervector.h b/sharedordervector.h
index 3568450..6aa8ba1 100644
--- a/sharedordervector.h
+++ b/sharedordervector.h
@@ -24,6 +24,7 @@ struct individual {
 		bool operator()(const individual& i1, const individual& i2) {
 		return (i1.abundance > i2.abundance);
 		}
+	individual() { group = ""; bin = 0; abundance = 0; }
 };
 
 struct individualFloat {
@@ -33,6 +34,7 @@ struct individualFloat {
 		bool operator()(const individual& i1, const individual& i2) {
 		return (i1.abundance > i2.abundance);
 		}
+	individualFloat() { group = ""; bin = 0; abundance = 0.0; }
 };
 
 
@@ -66,6 +68,7 @@ public:
 	vector<individual>::iterator end();
 	void push_back(int, int, string);  //OTU, abundance, group  MUST CALL UPDATE STATS AFTER PUSHBACK!!!
 	void updateStats();
+	void clear();
 
 	int getNumBins();
 	int getNumSeqs();
diff --git a/sharedrabundfloatvector.cpp b/sharedrabundfloatvector.cpp
index 84b1cf5..52ffbb8 100644
--- a/sharedrabundfloatvector.cpp
+++ b/sharedrabundfloatvector.cpp
@@ -131,11 +131,20 @@ void SharedRAbundFloatVector::set(int binNumber, float newBinSize, string groupn
 		numSeqs += (newBinSize - oldBinSize);
 	}
 	catch(exception& e) {
-		m->errorOut(e, "SharedRAbundVector", "set");
+		m->errorOut(e, "SharedRAbundFloatVector", "set");
 		exit(1);
 	}
 }
+/***********************************************************************/
 
+void SharedRAbundFloatVector::clear(){
+	numBins = 0;
+	maxRank = 0;
+	numSeqs = 0;
+	data.clear();
+	for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+	lookup.clear();
+}
 /***********************************************************************/
 float SharedRAbundFloatVector::getAbundance(int index){
 	return data[index].abundance;	
diff --git a/sharedrabundfloatvector.h b/sharedrabundfloatvector.h
index f826d9c..a3407f4 100644
--- a/sharedrabundfloatvector.h
+++ b/sharedrabundfloatvector.h
@@ -51,6 +51,7 @@ public:
 	void push_back(float, string);  //abundance, groupname
 	void pop_back();
 	void resize(int);
+	void clear();
 	int size();
 	
 	void print(ostream&);
diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp
index 9328cbd..9276b7b 100644
--- a/sharedrabundvector.cpp
+++ b/sharedrabundvector.cpp
@@ -205,6 +205,16 @@ vector <individual> SharedRAbundVector::getData(){
 }
 /***********************************************************************/
 
+void SharedRAbundVector::clear(){
+	numBins = 0;
+	maxRank = 0;
+	numSeqs = 0;
+	data.clear();
+	for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+	lookup.clear();
+}
+/***********************************************************************/
+
 void SharedRAbundVector::push_back(int binSize, string groupName){
 	try {
 		individual newGuy;
@@ -486,15 +496,15 @@ SharedOrderVector SharedRAbundVector::getSharedOrderVector() {
 OrderVector SharedRAbundVector::getOrderVector(map<string,int>* nameMap = NULL) {
 	try {
 		OrderVector ov;
-	
-		for(int i=0;i<data.size();i++){
+		for(int i=0;i<numBins;i++){
 			for(int j=0;j<data[i].abundance;j++){
 				ov.push_back(i);
 			}
 		}
 		random_shuffle(ov.begin(), ov.end());
-
+		
 		ov.setLabel(label);	
+
 		return ov;
 	}
 	catch(exception& e) {
diff --git a/sharedrabundvector.h b/sharedrabundvector.h
index 8f4227e..6584d1d 100644
--- a/sharedrabundvector.h
+++ b/sharedrabundvector.h
@@ -55,6 +55,7 @@ public:
 	void pop_back();
 	void resize(int);
 	int size();
+	void clear();
 	vector<individual>::reverse_iterator rbegin();
 	vector<individual>::reverse_iterator rend();
 	
diff --git a/sharedsabundvector.cpp b/sharedsabundvector.cpp
index 4aea589..1df608b 100644
--- a/sharedsabundvector.cpp
+++ b/sharedsabundvector.cpp
@@ -234,6 +234,15 @@ SharedOrderVector SharedSAbundVector::getSharedOrderVector() {
 		exit(1);
 	}
 }
+/***********************************************************************/
+
+void SharedSAbundVector::clear(){
+	numBins = 0;
+	maxRank = 0;
+	numSeqs = 0;
+	data.clear();
+}
+
 /***********************************************************************/
 OrderVector SharedSAbundVector::getOrderVector(map<string,int>* hold = NULL){
 	try {
diff --git a/sharedsabundvector.h b/sharedsabundvector.h
index 29b3745..cd78a2e 100644
--- a/sharedsabundvector.h
+++ b/sharedsabundvector.h
@@ -45,6 +45,7 @@ public:
 	void pop_back();
 	void resize(int);
 	int size();
+	void clear();
 
 	void print(ostream&);
 		
diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp
index 04f6c7c..036341f 100644
--- a/subsamplecommand.cpp
+++ b/subsamplecommand.cpp
@@ -12,7 +12,7 @@
 //**********************************************************************************************************************
 vector<string> SubSampleCommand::getValidParameters(){	
 	try {
-		string Array[] =  {"groups","label","outputdir","inputdir"};
+		string Array[] =  {"fasta", "group", "list","shared","rabund", "name","sabund","size","groups","label","outputdir","inputdir"};
 		vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 		return myArray;
 	}
@@ -31,6 +31,9 @@ SubSampleCommand::SubSampleCommand(){
 		outputTypes["list"] = tempOutNames;
 		outputTypes["rabund"] = tempOutNames;
 		outputTypes["sabund"] = tempOutNames;
+		outputTypes["fasta"] = tempOutNames;
+		outputTypes["name"] = tempOutNames;
+		outputTypes["group"] = tempOutNames;
 	}
 	catch(exception& e) {
 		m->errorOut(e, "SubSampleCommand", "GetRelAbundCommand");
@@ -40,7 +43,8 @@ SubSampleCommand::SubSampleCommand(){
 //**********************************************************************************************************************
 vector<string> SubSampleCommand::getRequiredParameters(){	
 	try {
-		vector<string> myArray;
+		string Array[] =  {"fasta","list","shared","rabund", "sabund","or"};
+		vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 		return myArray;
 	}
 	catch(exception& e) {
@@ -51,8 +55,7 @@ vector<string> SubSampleCommand::getRequiredParameters(){
 //**********************************************************************************************************************
 vector<string> SubSampleCommand::getRequiredFiles(){	
 	try {
-		string Array[] =  {"shared","list","rabund","sabund","or"};
-		vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+		vector<string> myArray;
 		return myArray;
 	}
 	catch(exception& e) {
@@ -73,8 +76,8 @@ SubSampleCommand::SubSampleCommand(string option) {
 		
 		else {
 			//valid paramters for this command
-			string AlignArray[] =  {"groups","label","outputdir","inputdir"};
-			vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+			string Array[] =  {"fasta", "group", "list","shared","rabund", "sabund","name","size","groups","label","outputdir","inputdir"};
+			vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 			
 			OptionParser parser(option);
 			map<string,string> parameters = parser.getParameters();
@@ -82,7 +85,8 @@ SubSampleCommand::SubSampleCommand(string option) {
 			ValidParameters validParameter;
 			
 			//check to make sure all parameters are valid for command
-			for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+			map<string,string>::iterator it;
+			for (it = parameters.begin(); it != parameters.end(); it++) { 
 				if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
 			}
 			
@@ -92,16 +96,105 @@ SubSampleCommand::SubSampleCommand(string option) {
 			outputTypes["list"] = tempOutNames;
 			outputTypes["rabund"] = tempOutNames;
 			outputTypes["sabund"] = tempOutNames;
+			outputTypes["fasta"] = tempOutNames;
+			outputTypes["name"] = tempOutNames;
+			outputTypes["group"] = tempOutNames;
 					
 			//if the user changes the output directory command factory will send this info to us in the output parameter 
-			outputDir = validParameter.validFile(parameters, "outputdir", false);		if (outputDir == "not found"){	
-				outputDir = "";	
-				outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it	
+			outputDir = validParameter.validFile(parameters, "outputdir", false);		if (outputDir == "not found"){	outputDir = "";	}
+			
+			//if the user changes the input directory command factory will send this info to us in the output parameter 
+			string inputDir = validParameter.validFile(parameters, "inputdir", false);		
+			if (inputDir == "not found"){	inputDir = "";		}
+			else {
+				string path;
+				it = parameters.find("list");
+				//user has given a template file
+				if(it != parameters.end()){ 
+					path = m->hasPath(it->second);
+					//if the user has not given a path then, add inputdir. else leave path alone.
+					if (path == "") {	parameters["list"] = inputDir + it->second;		}
+				}
+				
+				it = parameters.find("fasta");
+				//user has given a template file
+				if(it != parameters.end()){ 
+					path = m->hasPath(it->second);
+					//if the user has not given a path then, add inputdir. else leave path alone.
+					if (path == "") {	parameters["fasta"] = inputDir + it->second;		}
+				}
+				
+				it = parameters.find("shared");
+				//user has given a template file
+				if(it != parameters.end()){ 
+					path = m->hasPath(it->second);
+					//if the user has not given a path then, add inputdir. else leave path alone.
+					if (path == "") {	parameters["shared"] = inputDir + it->second;		}
+				}
+				
+				it = parameters.find("group");
+				//user has given a template file
+				if(it != parameters.end()){ 
+					path = m->hasPath(it->second);
+					//if the user has not given a path then, add inputdir. else leave path alone.
+					if (path == "") {	parameters["group"] = inputDir + it->second;		}
+				}
+				
+				it = parameters.find("sabund");
+				//user has given a template file
+				if(it != parameters.end()){ 
+					path = m->hasPath(it->second);
+					//if the user has not given a path then, add inputdir. else leave path alone.
+					if (path == "") {	parameters["sabund"] = inputDir + it->second;		}
+				}
+				
+				it = parameters.find("rabund");
+				//user has given a template file
+				if(it != parameters.end()){ 
+					path = m->hasPath(it->second);
+					//if the user has not given a path then, add inputdir. else leave path alone.
+					if (path == "") {	parameters["rabund"] = inputDir + it->second;		}
+				}
+				
+				it = parameters.find("name");
+				//user has given a template file
+				if(it != parameters.end()){ 
+					path = m->hasPath(it->second);
+					//if the user has not given a path then, add inputdir. else leave path alone.
+					if (path == "") {	parameters["name"] = inputDir + it->second;		}
+				}
 			}
 			
-			//make sure the user has already run the read.otu command
-			if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the sub.sample command."); m->mothurOutEndLine(); abort = true; }
-
+			//check for required parameters
+			listfile = validParameter.validFile(parameters, "list", true);
+			if (listfile == "not open") { listfile = ""; abort = true; }
+			else if (listfile == "not found") { listfile = ""; }	
+			
+			sabundfile = validParameter.validFile(parameters, "sabund", true);
+			if (sabundfile == "not open") { sabundfile = ""; abort = true; }	
+			else if (sabundfile == "not found") { sabundfile = ""; }
+			
+			rabundfile = validParameter.validFile(parameters, "rabund", true);
+			if (rabundfile == "not open") { rabundfile = ""; abort = true; }	
+			else if (rabundfile == "not found") { rabundfile = ""; }
+			
+			fastafile = validParameter.validFile(parameters, "fasta", true);
+			if (fastafile == "not open") { fastafile = ""; abort = true; }	
+			else if (fastafile == "not found") { fastafile = ""; }
+			
+			sharedfile = validParameter.validFile(parameters, "shared", true);
+			if (sharedfile == "not open") { sharedfile = ""; abort = true; }	
+			else if (sharedfile == "not found") { sharedfile = ""; }
+			
+			namefile = validParameter.validFile(parameters, "name", true);
+			if (namefile == "not open") { namefile = ""; abort = true; }	
+			else if (namefile == "not found") { namefile = ""; }
+			
+			groupfile = validParameter.validFile(parameters, "group", true);
+			if (groupfile == "not open") { groupfile = ""; abort = true; }	
+			else if (groupfile == "not found") { groupfile = ""; }
+			
+			
 			//check for optional parameter and set defaults
 			// ...at some point should added some additional type checking...
 			label = validParameter.validFile(parameters, "label", false);			
@@ -111,12 +204,6 @@ SubSampleCommand::SubSampleCommand(string option) {
 				else { allLines = 1;  }
 			}
 			
-			//if the user has not specified any labels use the ones from read.otu
-			if (label == "") {  
-				allLines = globaldata->allLines; 
-				labels = globaldata->labels; 
-			}
-			
 			groups = validParameter.validFile(parameters, "groups", false);			
 			if (groups == "not found") { groups = ""; pickedGroups = false; }
 			else { 
@@ -125,6 +212,20 @@ SubSampleCommand::SubSampleCommand(string option) {
 				globaldata->Groups = Groups;
 			}
 			
+			string temp = validParameter.validFile(parameters, "size", false);		if (temp == "not found"){	temp = "0";		}
+			convert(temp, size);  
+			
+			if ((namefile != "") && (fastafile == "")) { m->mothurOut("You may only use a namefile with a fastafile."); m->mothurOutEndLine(); abort = true; }
+			
+			if ((fastafile == "") && (listfile == "") && (sabundfile == "") && (rabundfile == "") && (sharedfile == "")) {
+				m->mothurOut("You must provide a fasta, list, sabund, rabund or shared file as an input file."); m->mothurOutEndLine(); abort = true; }
+			
+			if (pickedGroups && ((groupfile == "") && (sharedfile == ""))) { 
+				m->mothurOut("You cannot pick groups without a valid group file or shared file."); m->mothurOutEndLine(); abort = true; }
+			
+			if ((groupfile != "") && ((fastafile == "") && (listfile == ""))) { 
+				m->mothurOut("Group file only valid with listfile or fastafile."); m->mothurOutEndLine(); abort = true; }
+			
 		}
 
 	}
@@ -138,15 +239,16 @@ SubSampleCommand::SubSampleCommand(string option) {
 
 void SubSampleCommand::help(){
 	try {
-		m->mothurOut("The get.relabund command can only be executed after a successful read.otu command of a list and group or shared file.\n");
-		m->mothurOut("The get.relabund command parameters are groups, scale and label.  No parameters are required.\n");
+		m->mothurOut("The sub.sample command is designed to be used as a way to normalize your data, or create a smaller set from your original set.\n");
+		m->mothurOut("The sub.sample command parameters are fasta, name, list, group, rabund, sabund, shared, groups, size and label.  You must provide a fasta, list, sabund, rabund or shared file as an input file.\n");
 		m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
 		m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
-		m->mothurOut("The scale parameter allows you to select what scale you would like to use. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n");
-		m->mothurOut("The get.relabund command should be in the following format: get.relabund(groups=yourGroups, label=yourLabels).\n");
-		m->mothurOut("Example get.relabund(groups=A-B-C, scale=averagegroup).\n");
+		m->mothurOut("The size parameter allows you indicate the size of your subsample.\n");
+		m->mothurOut("The size parameter is not set: with shared file size=number of seqs in smallest sample, with all other files, 10% of number of seqs.\n");
+		m->mothurOut("The sub.sample command should be in the following format: sub.sample(list=yourListFile, group=yourGroupFile, groups=yourGroups, label=yourLabels).\n");
+		m->mothurOut("Example sub.sample(list=abrecovery.fn.list, group=abrecovery.groups, groups=B-C, size=20).\n");
 		m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
-		m->mothurOut("The get.relabund command outputs a .relabund file.\n");
+		m->mothurOut("The sub.sample command outputs a .subsample file.\n");
 		m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
 
 	}
@@ -158,8 +260,7 @@ void SubSampleCommand::help(){
 
 //**********************************************************************************************************************
 
-SubSampleCommand::~SubSampleCommand(){
-}
+SubSampleCommand::~SubSampleCommand(){}
 
 //**********************************************************************************************************************
 
@@ -168,47 +269,15 @@ int SubSampleCommand::execute(){
 	
 		if (abort == true) { return 0; }
 		
-		string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "subsample" +  m->getExtension(globaldata->inputFileName);
-		ofstream out;
-		m->openOutputFile(outputFileName, out);
-		out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-		
-		string format = globaldata->getFormat();
-		
-		read = new ReadOTUFile(globaldata->inputFileName);	
-		read->read(&*globaldata); 
-		input = globaldata->ginput;
-		
-		if (format == "sharedfile") {
-			lookup = input->getSharedRAbundVectors();
-			outputTypes["shared"].push_back(outputFileName);
-			getSubSampleShared(lookup, out);
-		}else if (format == "list") { 
-			list = globaldata->glist;
-			outputTypes["list"].push_back(outputFileName);
-			//getSubSamplesList();
-		}else if (format == "rabund") { 
-			rabund = globaldata->rabund;
-			outputTypes["rabund"].push_back(outputFileName);
-			//getSubSamplesRabund();
-		
-		}else if (format == "sabund") { 
-			sabund = globaldata->sabund;
-			outputTypes["sabund"].push_back(outputFileName);
-			//getSubSamplesSabund();
-		}
-		
-		out.close();
-					
-		//reset groups parameter
-		delete input; globaldata->ginput = NULL;
-		delete read;
-		
-		if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
-		
+		if (sharedfile != "")	{   getSubSampleShared();	}
+		//if (listfile != "")		{   getSubSampleList();		}
+		//if (rabund != "")		{   getSubSampleRabund();	}
+		//if (sabundfile != "")	{   getSubSampleSabund();	}
+		//if (fastafile != "")	{   getSubSampleFasta();	}
+				
 		m->mothurOutEndLine();
 		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-		m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); 
+		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
 		m->mothurOutEndLine();
 		
 		return 0;
@@ -219,26 +288,35 @@ int SubSampleCommand::execute(){
 	}
 }
 //**********************************************************************************************************************
-int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup, ofstream& filename) {
+int SubSampleCommand::getSubSampleShared() {
 	try {
+		
+		string thisOutputDir = outputDir;
+		if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
+		string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "subsample" + m->getExtension(sharedfile);
+		
+		ofstream out;
+		m->openOutputFile(outputFileName, out);
+		outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+		
+		InputData* input = new InputData(sharedfile, "sharedfile");
+		vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
+		string lastLabel = lookup[0]->getLabel();
 	
 		//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
 		set<string> processedLabels;
 		set<string> userLabels = labels;
 
-		string lastLabel = lookup[0]->getLabel();
 	
 		//as long as you are not at the end of the file or done wih the lines you want
 		while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
-			if (m->control_pressed) {  return 0;  }
+			if (m->control_pressed) {  out.close(); return 0;  }
 	
 			if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){			
 
 				m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
 				
-				//process lookup
-				
-								
+				processShared(lookup, out);
 																
 				processedLabels.insert(lookup[0]->getLabel());
 				userLabels.erase(lookup[0]->getLabel());
@@ -252,7 +330,8 @@ int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup
 				lookup = input->getSharedRAbundVectors(lastLabel);
 				m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
 				
-				//process lookup				
+				processShared(lookup, out);
+				
 				processedLabels.insert(lookup[0]->getLabel());
 				userLabels.erase(lookup[0]->getLabel());
 				
@@ -269,7 +348,7 @@ int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup
 		}
 		
 		
-		if (m->control_pressed) {  return 0;  }
+		if (m->control_pressed) {  out.close(); return 0;  }
 
 		//output error messages about any remaining user labels
 		set<string>::iterator it;
@@ -291,15 +370,12 @@ int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup
 			
 			m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
 			
-			//process lookup
-			
-			
+			processShared(lookup, out);
 			
 			for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
 		}
 	
-		//reset groups parameter
-		globaldata->Groups.clear();  
+		out.close();  
 				
 		return 0;
  
@@ -309,8 +385,74 @@ int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup
 		exit(1);
 	}
 }
-
-
+//**********************************************************************************************************************
+int SubSampleCommand::processShared(vector<SharedRAbundVector*>& thislookup, ofstream& out) {
+	try {
+		
+		if (pickedGroups) { eliminateZeroOTUS(thislookup); }
+		
+		if (size == 0) { //user has not set size, set size = smallest samples size
+			size = thislookup[0]->getNumSeqs();
+			for (int i = 1; i < thislookup.size(); i++) {
+				int thisSize = thislookup[i]->getNumSeqs();
+				
+				if (thisSize < size) {	size = thisSize;	}
+			}
+		}
+		
+		int numBins = thislookup[0]->getNumBins();
+		for (int i = 0; i < thislookup.size(); i++) {		
+			int thisSize = thislookup[i]->getNumSeqs();
+				
+			if (thisSize != size) {
+				
+				string thisgroup = thislookup[i]->getGroup();
+	
+				OrderVector* order = new OrderVector();
+				for(int p=0;p<numBins;p++){
+					for(int j=0;j<thislookup[i]->getAbundance(p);j++){
+						order->push_back(p);
+					}
+				}
+				random_shuffle(order->begin(), order->end());
+				
+				SharedRAbundVector* temp = new SharedRAbundVector(numBins);
+				temp->setLabel(thislookup[i]->getLabel());
+				temp->setGroup(thislookup[i]->getGroup());
+				
+				delete thislookup[i];
+				thislookup[i] = temp;
+				
+				
+				for (int j = 0; j < size; j++) {
+					//get random number to sample from order between 0 and thisSize-1.
+					int myrand = (int)((float)(rand()) / (RAND_MAX / (thisSize-1) + 1));
+					
+					int bin = order->get(myrand);
+					
+					int abund = thislookup[i]->getAbundance(bin);
+					thislookup[i]->set(bin, (abund+1), thisgroup);
+				}	
+				delete order;
+			}
+		}
+		
+		//subsampling may have created some otus with no sequences in them
+		eliminateZeroOTUS(thislookup);
+		
+		for (int i = 0; i < thislookup.size(); i++) {
+			out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
+			thislookup[i]->print(out);
+		}
+		
+		return 0;
+		
+	}
+	catch(exception& e) {
+		m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
+		exit(1);
+	}
+}
 //**********************************************************************************************************************
 int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
 	try {
@@ -326,7 +468,7 @@ int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup)
 		//for each bin
 		for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
 			if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-		
+			
 			//look at each sharedRabund and make sure they are not all zero
 			bool allZero = true;
 			for (int j = 0; j < thislookup.size(); j++) {
@@ -340,13 +482,14 @@ int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup)
 				}
 			}
 		}
-
+		
 		for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
-
+		thislookup.clear();
+		
 		thislookup = newLookup;
 		
 		return 0;
- 
+		
 	}
 	catch(exception& e) {
 		m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
@@ -357,3 +500,4 @@ int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup)
 //**********************************************************************************************************************
 
 
+
diff --git a/subsamplecommand.h b/subsamplecommand.h
index 98085ee..f068ac3 100644
--- a/subsamplecommand.h
+++ b/subsamplecommand.h
@@ -11,11 +11,12 @@
  */
  
 #include "command.hpp"
-#include "inputdata.h"
-#include "readotu.h"
+#include "globaldata.hpp"
 #include "sharedrabundvector.h"
+#include "listvector.hpp"
+#include "rabundvector.hpp"
+#include "inputdata.h"
 
-class GlobalData;
 
 class SubSampleCommand : public Command {
 
@@ -32,21 +33,19 @@ public:
 	
 private:
 	GlobalData* globaldata;
-	ReadOTUFile* read;
-	InputData* input;
-	vector<SharedRAbundVector*> lookup;
-	ListVector* list;
-	RAbundVector* rabund;
-	SAbundVector* sabund;
 	
-	bool abort, allLines, pickedGroups;
+	bool abort, pickedGroups, allLines;
+	string listfile, groupfile, sharedfile, rabundfile, sabundfile, fastafile, namefile;
 	set<string> labels; //holds labels to be used
 	string groups, label, outputDir;
 	vector<string> Groups, outputNames;
 	map<string, vector<string> > outputTypes;
+	int size;
 	
 	int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
-	int getSubSampleShared(vector<SharedRAbundVector*>&, ofstream&);
+	int getSubSampleShared();
+	int processShared(vector<SharedRAbundVector*>&, ofstream&);
+	
 };
 
 #endif