From 17a6a53298a907c005fa93fb82af9e533adcda09 Mon Sep 17 00:00:00 2001
From: westcott <westcott>
Date: Thu, 11 Mar 2010 13:20:55 +0000
Subject: [PATCH] testing mpi

---
 bellerophon.cpp        |   3 +
 chimeraseqscommand.cpp |   3 +-
 commandfactory.cpp     |  15 +-
 commandfactory.hpp     |   1 +
 engine.cpp             | 136 +++++++++++++++++-
 filters.h              |   3 +-
 filterseqscommand.cpp  | 315 +++++++++++++++++++++++++++++++++++------
 filterseqscommand.h    |   5 +
 getseqscommand.cpp     |   7 +-
 mothur.cpp             |  15 +-
 mothur.h               |   3 +
 mothurout.h            |   1 -
 removeseqscommand.cpp  |   7 +-
 sequence.cpp           | 170 +++++++++++++++++-----
 sequence.hpp           |   3 +
 sparsematrix.hpp       |   1 +
 16 files changed, 590 insertions(+), 98 deletions(-)

diff --git a/bellerophon.cpp b/bellerophon.cpp
index 2c545fe..54dfb9b 100644
--- a/bellerophon.cpp
+++ b/bellerophon.cpp
@@ -197,6 +197,7 @@ int Bellerophon::getChimeras() {
 				
 				if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
 				
+				left.clear(); right.clear();
 				vector<SeqMap> distMapRight;
 				vector<SeqMap> distMapLeft;
 				
@@ -251,6 +252,8 @@ int Bellerophon::getChimeras() {
 		//sort Preferences highest to lowest
 		sort(pref.begin(), pref.end(), comparePref);
 		
+		for (int i = 0; i < seqs.size(); i++) { delete seqs[i];  }  seqs.clear();
+		
 		return 0;
 		
 	}
diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp
index 69ce031..65e082b 100644
--- a/chimeraseqscommand.cpp
+++ b/chimeraseqscommand.cpp
@@ -340,7 +340,8 @@ int ChimeraSeqsCommand::execute(){
 			m->mothurOut(outputFileName); m->mothurOutEndLine();	
 			if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
 			m->mothurOutEndLine();
-
+			
+			delete chimera;
 			return 0;
 		}
 		
diff --git a/commandfactory.cpp b/commandfactory.cpp
index dd97b2f..b4f3654 100644
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -117,7 +117,7 @@ CommandFactory::CommandFactory(){
 	commands["bootstrap.shared"]	= "bootstrap.shared";
 	//commands["consensus"]			= "consensus";
 	commands["help"]				= "help"; 
-	commands["filter.seqs"]			= "filter.seqs";
+	commands["filter.seqs"]			= "MPIEnabled";
 	commands["align.seqs"]			= "align.seqs";
 	commands["summary.seqs"]		= "summary.seqs";
 	commands["screen.seqs"]			= "screen.seqs";
@@ -131,7 +131,7 @@ CommandFactory::CommandFactory(){
 	commands["align.check"]			= "align.check";
 	commands["get.sharedseqs"]		= "get.sharedseqs";
 	commands["get.otulist"]			= "get.otulist";
-	commands["quit"]				= "quit"; 
+	commands["quit"]				= "MPIEnabled"; 
 	commands["hcluster"]			= "hcluster"; 
 	commands["classify.seqs"]		= "classify.seqs"; 
 	commands["phylotype"]			= "phylotype";
@@ -145,6 +145,17 @@ CommandFactory::CommandFactory(){
 }
 /***********************************************************/
 
+/***********************************************************/
+bool CommandFactory::MPIEnabled(string commandName) {
+	bool mpi = false;
+	it = commands.find(commandName);
+	if (it != commands.end()) { 
+		if (it->second == "MPIEnabled") { return true; }
+	}
+	return mpi;
+}
+/***********************************************************/
+
 /***********************************************************/
 CommandFactory::~CommandFactory(){
 	_uniqueInstance = 0;
diff --git a/commandfactory.hpp b/commandfactory.hpp
index e1f62da..6f96e90 100644
--- a/commandfactory.hpp
+++ b/commandfactory.hpp
@@ -25,6 +25,7 @@ public:
 	void setOutputDirectory(string o)	{	outputDir = o;		}
 	void setInputDirectory(string i)	{	inputDir = i;		}
 	string getOutputDir()				{	return outputDir;	}
+	bool MPIEnabled(string);
 
 private:
 	Command* command;
diff --git a/engine.cpp b/engine.cpp
index 3bdc484..2d5fd75 100644
--- a/engine.cpp
+++ b/engine.cpp
@@ -54,8 +54,18 @@ bool InteractEngine::getInput(){
 			mout->mothurOutEndLine();
 			
 			input = getCommand();	
+			#ifdef USE_MPI
+					int pid;
+					MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					if (pid == 0) {
+			#endif
 			mout->mothurOutEndLine();	
 			
+			#ifdef USE_MPI
+				}
+			#endif
+			
 			if (mout->control_pressed) { input = "quit()"; }
 			
 			//allow user to omit the () on the quit command
@@ -68,11 +78,22 @@ bool InteractEngine::getInput(){
 			
 			if (commandName != "") {
 				mout->executing = true;
+				
+				#ifdef USE_MPI
+					int pid;
+					MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
+				#endif
 				//executes valid command
 				Command* command = cFactory->getCommand(commandName, options);
 				quitCommandCalled = command->execute();
 				mout->control_pressed = 0;
 				mout->executing = false;
+				
+				#ifdef USE_MPI
+					}
+				#endif
 			}else {
 				mout->mothurOut("Your input contains errors. Please try again."); 
 				mout->mothurOutEndLine();
@@ -99,21 +120,83 @@ string Engine::getCommand()  {
 					cout << nextCommand << endl;
 				}	
 				
+				#ifdef USE_MPI
+					int pid;
+					MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					if (pid == 0) { //only one process should output to screen
+				#endif
+
 				mout->mothurOutJustToLog("mothur > " + toString(nextCommand));
+				
+				#ifdef USE_MPI
+					}
+				#endif
+				
 				return nextCommand;
 			#else
 				string nextCommand = "";
+				#ifdef USE_MPI
+					int pid;
+					MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					if (pid == 0) { //only one process should output to screen
+				#endif
+
 				mout->mothurOut("mothur > ");
+				
+				#ifdef USE_MPI
+					}
+				#endif
+				
 				getline(cin, nextCommand);
+				
+				#ifdef USE_MPI
+					int pid;
+					MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					if (pid == 0) { //only one process should output to screen
+				#endif
+				
 				mout->mothurOutJustToLog("mothur > " + toString(nextCommand));
+				
+				#ifdef USE_MPI
+					}
+				#endif
+
 				return nextCommand;
 			#endif
 		#else
 			string nextCommand = "";
-			mout->mothurOut("mothur > ");
-			getline(cin, nextCommand);
-			mout->mothurOutJustToLog("mothur > " + toString(nextCommand));
-			return nextCommand;
+				#ifdef USE_MPI
+					int pid;
+					MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					if (pid == 0) { //only one process should output to screen
+				#endif
+
+				mout->mothurOut("mothur > ");
+				
+				#ifdef USE_MPI
+					}
+				#endif
+				
+				getline(cin, nextCommand);
+				
+				#ifdef USE_MPI
+					int pid;
+					MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					if (pid == 0) { //only one process should output to screen
+				#endif
+				
+				mout->mothurOutJustToLog(toString(nextCommand));
+				
+				#ifdef USE_MPI
+					}
+				#endif
+
+				return nextCommand;
 		#endif
 		
 		mout->mothurOutEndLine();
@@ -169,10 +252,21 @@ bool BatchEngine::getInput(){
 			
 			if (input[0] != '#') {
 				
+				#ifdef USE_MPI
+					int pid;
+					MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					if (pid == 0) { //only one process should output to screen
+				#endif
+
 				mout->mothurOutEndLine();
 				mout->mothurOut("mothur > " + input);
 				mout->mothurOutEndLine();
 				
+				#ifdef USE_MPI
+					}
+				#endif
+				
 				if (mout->control_pressed) { input = "quit()"; }
 				
 				//allow user to omit the () on the quit command
@@ -184,11 +278,21 @@ bool BatchEngine::getInput(){
 										
 				if (commandName != "") {
 					mout->executing = true;
+					#ifdef USE_MPI
+						int pid;
+						MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+						if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
+					#endif
 					//executes valid command
 					Command* command = cFactory->getCommand(commandName, options);
 					quitCommandCalled = command->execute();
 					mout->control_pressed = 0;
 					mout->executing = false;
+				
+					#ifdef USE_MPI
+						}
+					#endif
 				}else {		
 					mout->mothurOut("Invalid."); 
 					mout->mothurOutEndLine();
@@ -250,11 +354,21 @@ bool ScriptEngine::getInput(){
 			
 			if (input == "") { input = "quit()"; }
 			
-			
+			#ifdef USE_MPI
+					int pid;
+					MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					if (pid == 0) {
+			#endif
+
 			mout->mothurOutEndLine();
 			mout->mothurOut("mothur > " + input);
 			mout->mothurOutEndLine();
-
+			
+			#ifdef USE_MPI
+					}
+			#endif
+			
 			if (mout->control_pressed) { input = "quit()"; }
 				
 			//allow user to omit the () on the quit command
@@ -266,11 +380,21 @@ bool ScriptEngine::getInput(){
 										
 			if (commandName != "") {
 				mout->executing = true;
+				#ifdef USE_MPI
+					int pid;
+					MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
+				#endif
 				//executes valid command
 				Command* command = cFactory->getCommand(commandName, options);
 				quitCommandCalled = command->execute();
 				mout->control_pressed = 0;
 				mout->executing = false;
+				
+				#ifdef USE_MPI
+					}
+				#endif
 			}else {		
 				mout->mothurOut("Invalid."); 
 				mout->mothurOutEndLine();
diff --git a/filters.h b/filters.h
index 4c39607..94f8d33 100644
--- a/filters.h
+++ b/filters.h
@@ -28,6 +28,8 @@ public:
 	void setSoft(float s)		{		soft = s;		}
 	void setTrump(float t)		{		trump = t;		}
 	void setNumSeqs(int num)	{	numSeqs = num;		}
+	vector<int> a, t, g, c, gap;
+	
 	
 	void initialize() {
 		a.assign(alignmentLength, 0);
@@ -89,7 +91,6 @@ public:
 		
 protected:
 	string filter;
-	vector<int> a, t, g, c, gap;
 	int alignmentLength, numSeqs;
 	float soft;
 	char trump;
diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp
index 0e33ab2..326be01 100644
--- a/filterseqscommand.cpp
+++ b/filterseqscommand.cpp
@@ -131,6 +131,13 @@ FilterSeqsCommand::FilterSeqsCommand(string option)  {
 
 void FilterSeqsCommand::help(){
 	try {
+		#ifdef USE_MPI
+				int pid;
+				MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+				if (pid == 0) {
+		#endif
+		
 		m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n");
 		m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n");
 		m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n");
@@ -144,6 +151,10 @@ void FilterSeqsCommand::help(){
 		m->mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T).\n");
 		m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
 		
+		#ifdef USE_MPI
+			}
+		#endif
+		
 	}
 	catch(exception& e) {
 		m->errorOut(e, "FilterSeqsCommand", "help");
@@ -221,7 +232,13 @@ int FilterSeqsCommand::execute() {
 		
 		if (m->control_pressed) {  for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }  return 0; }
 
-		
+		#ifdef USE_MPI
+				int pid;
+				MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+				if (pid == 0) {
+		#endif
+
 		m->mothurOutEndLine();
 		m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine();
 		m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine();
@@ -233,6 +250,10 @@ int FilterSeqsCommand::execute() {
 		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 		for(int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();	 }
 		m->mothurOutEndLine();
+		
+		#ifdef USE_MPI
+			}
+		#endif
 
 		return 0;
 		
@@ -267,72 +288,156 @@ string FilterSeqsCommand::createFilter() {
 			for (int s = 0; s < fastafileNames.size(); s++) {
 			
 				for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+			
+#ifdef USE_MPI	
+				int pid, rc, ierr; 
+				char* buf;
+				int Atag = 1; int Ttag = 2; int Ctag = 3; int Gtag = 4; int Gaptag = 5;
 				
-				#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-					if(processors == 1){
-						ifstream inFASTA;
-						openInputFile(fastafileNames[s], inFASTA);
-						int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-						inFASTA.close();
+				MPI_Status status; 
+				MPI_File in; 
+				rc = MPI_Comm_size(MPI_COMM_WORLD, &processors);
+				rc = MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
 						
-						numSeqs += numFastaSeqs;
-				
-						lines.push_back(new linePair(0, numFastaSeqs));
-				
-						driverCreateFilter(F, fastafileNames[s], lines[0]);
-					}else{
-						vector<int> positions;
-				
-						ifstream inFASTA;
-						openInputFile(fastafileNames[s], inFASTA);
-				
-						string input;
-						while(!inFASTA.eof()){
-							input = getline(inFASTA);
-							if (input.length() != 0) {
-								if(input[0] == '>'){	long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);	}
+							
+				char* tempFileName = &(fastafileNames[s][0]);
+				MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &in);  //comm, filename, mode, info, filepointer
+								
+				if (pid == 0) { //you are the root process
+						setLines(fastafileNames[s]);
+						
+						for (int j = 0; j < lines.size(); j++) { //each process
+							if (j != 0) { //don't send to yourself
+								MPI_Send(&lines[j]->start, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //start position in file
+								MPI_Send(&lines[j]->numSeqs, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //how many sequences we are sending
+								MPI_Send(&bufferSizes[j], 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //how bytes for the read
 							}
 						}
-						inFASTA.close();
-				
-						int numFastaSeqs = positions.size();
+						cout << "done sending" << endl;
+						cout << "parent = " << pid << " lines = " << lines[pid]->start << '\t' << lines[pid]->numSeqs << " size = " <<  lines.size() << endl;	
+						
+						buf = new char(bufferSizes[0]);
+			cout << pid << '\t' << bufferSizes[0] << " line 1 start pos = " << lines[1]->start   << " buffer size 0 " << bufferSizes[0] << " buffer size 1 " << bufferSizes[1] << endl;			
+						MPI_File_read_at(in, 0, buf, bufferSizes[0], MPI_CHAR, &status);
+						
+		cout << pid << " done reading " << endl;
+						string tempBuf = buf;
+			cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl;									
+						//do your part
+						MPICreateFilter(F, tempBuf);
+						
+						vector<int> temp; temp.resize(numSeqs);
+						
+						//get the frequencies from the child processes
+						for(int i = 0; i < ((processors-1)*5); i++) { 
+				cout << "i = " << i << endl;
+							int ierr = MPI_Recv(&temp, numSeqs, MPI_INT, MPI_ANY_SOURCE, 2001, MPI_COMM_WORLD, &status); 
+							
+							int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what letter count this is for
+							
+							int sender = status.MPI_SOURCE; 
+							
+							if (receiveTag == Atag) { //you are recieveing the A frequencies
+								for (int k = 0; k < alignmentLength; k++) {		F.a[k] += temp[k];	}
+							}else if (receiveTag == Ttag) { //you are recieveing the T frequencies
+								for (int k = 0; k < alignmentLength; k++) {		F.t[k] += temp[k];	}
+							}else if (receiveTag == Ctag) { //you are recieveing the C frequencies
+								for (int k = 0; k < alignmentLength; k++) {		F.c[k] += temp[k];	}
+							}else if (receiveTag == Gtag) { //you are recieveing the G frequencies
+								for (int k = 0; k < alignmentLength; k++) {		F.g[k] += temp[k];	}
+							}else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
+								for (int k = 0; k < alignmentLength; k++) {		F.gap[k] += temp[k];	}
+							}
+							
+							m->mothurOut("receive tag = " + toString(receiveTag) + " " + toString(sender) + " is complete."); m->mothurOutEndLine();
+						} 
+
 						
-						numSeqs += numFastaSeqs;
+				}else { //i am the child process
+					int startPos, numLines, bufferSize;
+				cout << "child = " << pid << endl;
+					ierr = MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+					ierr = MPI_Recv(&numLines, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+					ierr = MPI_Recv(&bufferSize, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+				cout << "child = " << pid << " done recv messages startpos = " << startPos << " numLines = " << numLines << " buffersize = " << bufferSize << endl;	
 				
-						int numSeqsPerProcessor = numFastaSeqs / processors;
+					
+					//send freqs
+					char* buf2 = new char(bufferSize);
+					MPI_File_read_at( in, startPos, buf2, bufferSize, MPI_CHAR, &status);
+				cout << pid << " done reading " << endl;
+					
+					string tempBuf = buf2;
+			cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl;
+					MPICreateFilter(F, tempBuf);
 				
-						for (int i = 0; i < processors; i++) {
-							long int startPos = positions[ i * numSeqsPerProcessor ];
-							if(i == processors - 1){
-								numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
-							}
-							lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-						}
+					//send my fequency counts
+					F.a.push_back(Atag);
+					int ierr = MPI_Send( &F.a[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+					F.t.push_back(Ttag);
+					ierr = MPI_Send( &F.t[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+					F.c.push_back(Ctag);
+					ierr = MPI_Send( &F.c[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+					F.g.push_back(Gtag);
+					ierr = MPI_Send( &F.g[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+					F.gap.push_back(Gaptag);
+					ierr = MPI_Send( &F.gap[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+					
+					cout << "child " << pid << " done sending counts" << endl;
+				}
 				
-						createProcessesCreateFilter(F, fastafileNames[s]); 
-					}
-				#else
+				MPI_Barrier(MPI_COMM_WORLD);
+				
+#else
+		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+				if(processors == 1){
 					ifstream inFASTA;
 					openInputFile(fastafileNames[s], inFASTA);
 					int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
 					inFASTA.close();
-						
+					
 					numSeqs += numFastaSeqs;
-				
+					
 					lines.push_back(new linePair(0, numFastaSeqs));
+					
+					driverCreateFilter(F, fastafileNames[s], lines[0]);
+				}else{
+					
+					setLines(fastafileNames[s]);					
+					createProcessesCreateFilter(F, fastafileNames[s]); 
+				}
+		#else
+				ifstream inFASTA;
+				openInputFile(fastafileNames[s], inFASTA);
+				int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+				inFASTA.close();
 				
-					driverCreateFilter(F, lines[0], fastafileNames[s]);
-				#endif
-			
+				numSeqs += numFastaSeqs;
+				
+				lines.push_back(new linePair(0, numFastaSeqs));
+				
+				driverCreateFilter(F, lines[0], fastafileNames[s]);
+		#endif
+#endif
 			
 			}
 		}
-		
+
+#ifdef USE_MPI
+
+//merge all frequency data and create filter string
+					//int pid;
+					//MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+					
+					//if (pid == 0) { //only one process should output to screen
+#endif
+
+	cout << "made it here" << endl;	
 		F.setNumSeqs(numSeqs);
 				
 		if(isTrue(vertical) == 1)	{	F.doVertical();	}
 		if(soft != 0)				{	F.doSoft();		}
-		
+//cout << "Filter String = " << F.getFilter() << endl;			
 		filterString = F.getFilter();
 
 		return filterString;
@@ -361,8 +466,14 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair*
 					if(isTrue(vertical) || soft != 0){	F.getFreqs(seq);	}
 					cout.flush();
 			}
+			
+			//report progress
+			if((i+1) % 100 == 0){	m->mothurOut(toString(i+1)); m->mothurOutEndLine();		}
 		}
-				
+		
+		//report progress
+		if((line->numSeqs) % 100 != 0){	m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine();		}
+		
 		in.close();
 		
 		return 0;
@@ -372,6 +483,38 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair*
 		exit(1);
 	}
 }
+/**************************************************************************************/
+int FilterSeqsCommand::MPICreateFilter(Filters& F, string temp) {	
+	try {
+		
+		vector<string> seqStrings;
+		parseBuffer(temp, seqStrings);
+		
+		for(int i=0;i<seqStrings.size();i++){
+				
+			if (m->control_pressed) { return 1; }
+			
+			Sequence seq("", seqStrings[0]);
+			
+			if(trump != '*'){	F.doTrump(seq);	}
+			if(isTrue(vertical) || soft != 0){	F.getFreqs(seq);	}
+			cout.flush();
+						
+			//report progress
+			if((i+1) % 100 == 0){	m->mothurOut(toString(i+1)); m->mothurOutEndLine();		}
+		}
+		
+		//report progress
+		if((seqStrings.size()) % 100 != 0){	m->mothurOut(toString(seqStrings.size())); m->mothurOutEndLine();		}
+		
+		return 0;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "FilterSeqsCommand", "MPICreateFilter");
+		exit(1);
+	}
+}
+
 /**************************************************************************************************/
 
 int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) {
@@ -408,4 +551,84 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename)
 		exit(1);
 	}
 }
+/**************************************************************************************************/
+
+int FilterSeqsCommand::setLines(string filename) {
+	try {
+		vector<int> positions;
+		map<int, int> buf;
+		bufferSizes.clear();
+		
+		int pid;
+		MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+	
+		ifstream inFASTA;
+		openInputFile(filename, inFASTA);
+			
+		string input;
+		int numbuf = 0;
+		while(!inFASTA.eof()){	
+			input = getline(inFASTA);
+
+			if (input.length() != 0) {
+				numbuf += input.length();
+				if(input[0] == '>'){	long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);	buf[(pos - input.length() - 1)] = numbuf; }
+			}
+		}
+
+		inFASTA.close();
+		
+		int numFastaSeqs = positions.size();
+		
+		numSeqs += numFastaSeqs;
+		
+		int numSeqsPerProcessor = numFastaSeqs / processors;
+		
+		for (int i = 0; i < processors; i++) {
+
+			long int startPos = positions[ i * numSeqsPerProcessor ];
+			if(i == processors - 1){
+				numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
+				bufferSizes.push_back(numbuf-buf[startPos]);
+			}else{  
+				int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
+				bufferSizes.push_back(buf[myEnd]-buf[startPos]);
+			}
+			lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+		}
+		
+		return 0;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "FilterSeqsCommand", "setLines");
+		exit(1);
+	}
+}
+/**************************************************************************************************/
+int FilterSeqsCommand::parseBuffer(string file, vector<string>& seqs) {
+	try {
+		
+		istringstream iss (file,istringstream::in);
+		string name, seqstring;
+		
+		while (iss) {
+			
+			if (m->control_pressed) { return 0; }
+		cout << "here" << endl;			
+			Sequence seq(iss); 
+	cout << "here1" << endl;			
+			gobble(iss);
+	cout << seq.getName() << endl;		
+			if (seq.getName() != "") {
+				seqs.push_back(seq.getAligned());	
+			}
+		}
+		
+		return 0;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "FilterSeqsCommand", "parseBuffer");
+		exit(1);
+	}
+}
 /**************************************************************************************/
diff --git a/filterseqscommand.h b/filterseqscommand.h
index 232b0ac..3cc007c 100644
--- a/filterseqscommand.h
+++ b/filterseqscommand.h
@@ -35,6 +35,7 @@ private:
 	string vertical, filter, fasta, hard, outputDir, filterFileName;
 	vector<string> fastafileNames;	
 	int alignmentLength, processors;
+	vector<int> bufferSizes;
 
 	char trump;
 	bool abort;
@@ -44,6 +45,10 @@ private:
 	string createFilter();
 	int createProcessesCreateFilter(Filters&, string);
 	int driverCreateFilter(Filters&, string, linePair*);
+	int MPICreateFilter(Filters&, string);	
+	int setLines(string);
+	int parseBuffer(string, vector<string>&);
+	
 };
 
 #endif
diff --git a/getseqscommand.cpp b/getseqscommand.cpp
index 7535e65..f40c89b 100644
--- a/getseqscommand.cpp
+++ b/getseqscommand.cpp
@@ -277,10 +277,13 @@ int GetSeqsCommand::readList(){
 				}
 			
 				//get last name
-				if (names.count(binnames) == 1) {  newNames += binnames;  }
+				if (names.count(binnames) == 1) {  newNames += binnames + ",";  }
 
 				//if there are names in this bin add to new list
-				if (newNames != "") {  newList.push_back(newNames);	}
+				if (newNames != "") { 
+					newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
+					newList.push_back(newNames);	
+				}
 			}
 				
 			//print new listvector
diff --git a/mothur.cpp b/mothur.cpp
index a3eef76..6631489 100644
--- a/mothur.cpp
+++ b/mothur.cpp
@@ -12,6 +12,7 @@
 #include "globaldata.hpp"
 #include "mothurout.h"
 
+
 /**************************************************************************************************/
 
 GlobalData* GlobalData::_uniqueInstance = 0;
@@ -99,7 +100,11 @@ int main(int argc, char *argv[]){
 		m->mothurOutEndLine();			
 		m->mothurOut("Type 'quit()' to exit program");
 		m->mothurOutEndLine();	
-
+		
+		#ifdef USE_MPI
+			m->mothurOutJustToLog("Using MPI\n");
+			MPI_Init(&argc, &argv); 
+		#endif
 				
 		//srand(54321);
 		srand( (unsigned)time( NULL ) );
@@ -130,7 +135,7 @@ int main(int argc, char *argv[]){
 			mothur = new InteractEngine(argv[0]);	
 		}
 		
-		while(bail == 0)		{	bail = mothur->getInput();			}
+		while(bail == 0)	{	bail = mothur->getInput();	}
 		
 		string outputDir = mothur->getOutputDir();
 		string newlogFileName = outputDir + logFileName;
@@ -139,7 +144,11 @@ int main(int argc, char *argv[]){
 		rename(logFileName.c_str(), newlogFileName.c_str()); //logfile with timestamp
 		
 		delete mothur;
-
+		
+		#ifdef USE_MPI
+			MPI_Finalize();
+		#endif
+		
 		return 0;
 	}
 	catch(exception& e) {
diff --git a/mothur.h b/mothur.h
index 16a9844..7904425 100644
--- a/mothur.h
+++ b/mothur.h
@@ -47,6 +47,9 @@
 #include <ctime>
 #include <limits>
 
+#ifdef USE_MPI
+	#include "mpi.h"
+#endif
 /***********************************************************************/
 
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
diff --git a/mothurout.h b/mothurout.h
index d9c8e5d..7b4732b 100644
--- a/mothurout.h
+++ b/mothurout.h
@@ -26,7 +26,6 @@ class MothurOut {
 		void errorOut(exception&, string, string);
 		int control_pressed;
 		bool executing;
-		
 
 	private:
 		static MothurOut* _uniqueInstance;
diff --git a/removeseqscommand.cpp b/removeseqscommand.cpp
index e8565d0..07e946b 100644
--- a/removeseqscommand.cpp
+++ b/removeseqscommand.cpp
@@ -279,10 +279,13 @@ int RemoveSeqsCommand::readList(){
 				}
 			
 				//get last name
-				if (names.count(binnames) == 0) {  newNames += binnames;  }
+				if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
 
 				//if there are names in this bin add to new list
-				if (newNames != "") {  newList.push_back(newNames);	}
+				if (newNames != "") {  
+					newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
+					newList.push_back(newNames);	
+				}
 			}
 				
 			//print new listvector
diff --git a/sequence.cpp b/sequence.cpp
index 94caf65..b73bfab 100644
--- a/sequence.cpp
+++ b/sequence.cpp
@@ -19,46 +19,101 @@ Sequence::Sequence(){
 /***********************************************************************/
 
 Sequence::Sequence(string newName, string sequence) {
-
-	initialize();	
-	name = newName;
-	
-	//setUnaligned removes any gap characters for us
-	setUnaligned(sequence);
-	setAligned(sequence);
-	
+	try {
+		m = MothurOut::getInstance();
+		initialize();	
+		name = newName;
+		
+		//setUnaligned removes any gap characters for us
+		setUnaligned(sequence);
+		setAligned(sequence);
+	}
+	catch(exception& e) {
+		m->errorOut(e, "Sequence", "Sequence");
+		exit(1);
+	}			
 }
 //********************************************************************************************************************
 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
-Sequence::Sequence(ifstream& fastaFile){
-
-	initialize();
-	fastaFile >> name;
-	name = name.substr(1);
-	string sequence;
-	
-	//read comments
-	while ((name[0] == '#') && fastaFile) { 
-	    while (!fastaFile.eof())	{	char c = fastaFile.get(); if (c == 10 || c == 13){	break;	}	} // get rest of line if there's any crap there
-		sequence = getCommentString(fastaFile);
+Sequence::Sequence(istringstream& fastaString){
+	try {
+		m = MothurOut::getInstance();
+	int pid;
+	MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+	cout << pid << " after mothur instance " << &name << endl;
+		initialize();
+	cout << "after mothur initialize" << endl;
+		fastaString >> name;
+	cout << "after name "  << endl;
+		name = name.substr(1);
+		string sequence;
 		
-		if (fastaFile) {  
-			fastaFile >> name;  
-			name = name.substr(1);	
-		}else { 
-			name = "";
-			break;
+		//read comments
+		while ((name[0] == '#') && fastaString) { 
+			while (fastaString)	{	char c = fastaString.get(); if (c == 10 || c == 13){	break;	}	} // get rest of line if there's any crap there
+			sequence = getCommentString(fastaString);
+			
+			if (fastaString) {  
+				fastaString >> name;  
+				name = name.substr(1);	
+			}else { 
+				name = "";
+				break;
+			}
 		}
+	cout << "after mothur comment" << endl;	
+		//read real sequence
+		while (fastaString)	{	char c = fastaString.get(); if (c == 10 || c == 13){	break;	}	} // get rest of line if there's any crap there
+	cout << "after mothur name" << endl;	
+		sequence = getSequenceString(fastaString);		
+	cout << "after mothur sequence" << endl;	
+		setAligned(sequence);	
+		//setUnaligned removes any gap characters for us						
+		setUnaligned(sequence);		
 	}
-	
-	//read real sequence
-	while (!fastaFile.eof())	{	char c = fastaFile.get(); if (c == 10 || c == 13){	break;	}	} // get rest of line if there's any crap there
+	catch(exception& e) {
+		m->errorOut(e, "Sequence", "Sequence");
+		exit(1);
+	}								
+}
+
+//********************************************************************************************************************
+//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
+Sequence::Sequence(ifstream& fastaFile){
+	try {
+		m = MothurOut::getInstance();
+		initialize();
+		fastaFile >> name;
+		name = name.substr(1);
+		string sequence;
+		
+		//read comments
+		while ((name[0] == '#') && fastaFile) { 
+			while (!fastaFile.eof())	{	char c = fastaFile.get(); if (c == 10 || c == 13){	break;	}	} // get rest of line if there's any crap there
+			sequence = getCommentString(fastaFile);
+			
+			if (fastaFile) {  
+				fastaFile >> name;  
+				name = name.substr(1);	
+			}else { 
+				name = "";
+				break;
+			}
+		}
+		
+		//read real sequence
+		while (!fastaFile.eof())	{	char c = fastaFile.get(); if (c == 10 || c == 13){	break;	}	} // get rest of line if there's any crap there
+		
+		sequence = getSequenceString(fastaFile);		
 		
-	sequence = getSequenceString(fastaFile);		
-   	
-	setAligned(sequence);	
-	//setUnaligned removes any gap characters for us						
-	setUnaligned(sequence);								
+		setAligned(sequence);	
+		//setUnaligned removes any gap characters for us						
+		setUnaligned(sequence);	
+	}
+	catch(exception& e) {
+		m->errorOut(e, "Sequence", "Sequence");
+		exit(1);
+	}							
 }
 //********************************************************************************************************************
 string Sequence::getSequenceString(ifstream& fastaFile) {
@@ -108,7 +163,54 @@ string Sequence::getCommentString(ifstream& fastaFile) {
 		exit(1);
 	}
 }
-
+//********************************************************************************************************************
+string Sequence::getSequenceString(istringstream& fastaFile) {
+	try {
+		char letter;
+		string sequence = "";	
+		
+		while(fastaFile){
+			letter= fastaFile.get();
+			if(letter == '>'){
+				fastaFile.putback(letter);
+				break;
+			}
+			else if(isprint(letter)){
+				letter = toupper(letter);
+				if(letter == 'U'){letter = 'T';}
+				sequence += letter;
+			}
+		}
+		
+		return sequence;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "Sequence", "getSequenceString");
+		exit(1);
+	}
+}
+//********************************************************************************************************************
+//comment can contain '>' so we need to account for that
+string Sequence::getCommentString(istringstream& fastaFile) {
+	try {
+		char letter;
+		string sequence = "";
+		
+		while(fastaFile){
+			letter=fastaFile.get();
+			if((letter == '\r') || (letter == '\n')){  
+				gobble(fastaFile);  //in case its a \r\n situation
+				break;
+			}
+		}
+		
+		return sequence;
+	}
+	catch(exception& e) {
+		m->errorOut(e, "Sequence", "getCommentString");
+		exit(1);
+	}
+}
 //********************************************************************************************************************
 
 void Sequence::initialize(){
diff --git a/sequence.hpp b/sequence.hpp
index f48bf13..5f84d44 100644
--- a/sequence.hpp
+++ b/sequence.hpp
@@ -24,6 +24,7 @@ public:
 	Sequence();
 	Sequence(string, string);
 	Sequence(ifstream&);
+	Sequence(istringstream&);
 	
 	void setName(string);
 	void setUnaligned(string);
@@ -51,6 +52,8 @@ private:
 	void initialize();
 	string getSequenceString(ifstream&);
 	string getCommentString(ifstream&);
+	string getSequenceString(istringstream&);
+	string getCommentString(istringstream&);
 	string name;
 	string unaligned;
 	string aligned;
diff --git a/sparsematrix.hpp b/sparsematrix.hpp
index e42fdbb..c8a11d7 100644
--- a/sparsematrix.hpp
+++ b/sparsematrix.hpp
@@ -27,6 +27,7 @@ class SparseMatrix {
 	
 public:
 	SparseMatrix();
+	~SparseMatrix(){  while(!mins.empty() && mins.back() == NULL){  mins.pop_back();	}  }
 	int getNNodes();
 	void print();					//Print the contents of the matrix
 	void print(ListVector*);		//Print the contents of the matrix
-- 
2.39.5