From ee8403d4eb5760187d62b42a9cf4272de8fc0ec4 Mon Sep 17 00:00:00 2001
From: Sarah Westcott <mothur.westcott@gmail.com>
Date: Tue, 12 Jun 2012 11:27:51 -0400
Subject: [PATCH] changed reading of name file to use buffered reads. note the
 splitAtWhiteSpace function is sensitive to the gobble function. do not use
 the two together while reading or the read can get off track. modified
 trim.seqs group counts to include the redundant sees if a names file is
 provided.  changed group maps read of a group file to be buffered.  modified
 appendFiles functions to be buffered.

---
 aligncommand.cpp          |  40 ++-------
 aligncommand.h            |   1 -
 classifyseqscommand.cpp   |  30 +------
 classifyseqscommand.h     |   1 -
 consensusseqscommand.cpp  |  41 ++++-----
 createdatabasecommand.cpp |  28 +-----
 createdatabasecommand.h   |   1 -
 groupmap.cpp              | 176 +++++++++++++++++++++++++-------------
 mothurout.cpp             | 162 +++++++++++++++++++++++++----------
 mothurout.h               |   1 +
 screenseqscommand.h       |   1 -
 subsamplecommand.cpp      |  35 ++------
 trimseqscommand.cpp       |   7 +-
 trimseqscommand.h         |   6 +-
 14 files changed, 280 insertions(+), 250 deletions(-)

diff --git a/aligncommand.cpp b/aligncommand.cpp
index 8215de3..75466f9 100644
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -875,7 +875,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
 			if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
 			in.close(); m->mothurRemove(tempFile);
 			
-			appendAlignFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
+			m->appendFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
 			m->mothurRemove((alignFileName + toString(processIDS[i]) + ".temp"));
 			
 			appendReportFiles((reportFileName + toString(processIDS[i]) + ".temp"), reportFileName);
@@ -892,7 +892,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
 			rename(nonBlankAccnosFiles[0].c_str(), accnosFName.c_str());
 			
 			for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-				appendAlignFiles(nonBlankAccnosFiles[h], accnosFName);
+				m->appendFiles(nonBlankAccnosFiles[h], accnosFName);
 				m->mothurRemove(nonBlankAccnosFiles[h]);
 			}
 		}else { //recreate the accnosfile if needed
@@ -957,7 +957,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
 		else { m->mothurRemove(accnosFName); } //remove so other files can be renamed to it
 		
 		for (int i = 1; i < processors; i++) {
-			appendAlignFiles((alignFileName + toString(i) + ".temp"), alignFileName);
+			m->appendFiles((alignFileName + toString(i) + ".temp"), alignFileName);
 			m->mothurRemove((alignFileName + toString(i) + ".temp"));
 			
 			appendReportFiles((reportFileName + toString(i) + ".temp"), reportFileName);
@@ -973,7 +973,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
 			rename(nonBlankAccnosFiles[0].c_str(), accnosFName.c_str());
 			
 			for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-				appendAlignFiles(nonBlankAccnosFiles[h], accnosFName);
+				m->appendFiles(nonBlankAccnosFiles[h], accnosFName);
 				m->mothurRemove(nonBlankAccnosFiles[h]);
 			}
 		}else { //recreate the accnosfile if needed
@@ -990,29 +990,6 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
 		exit(1);
 	}
 }
-/**************************************************************************************************/
-
-void AlignCommand::appendAlignFiles(string temp, string filename) {
-	try{
-		
-		ofstream output;
-		ifstream input;
-		m->openOutputFileAppend(filename, output);
-		m->openInputFile(temp, input);
-		
-		while(char c = input.get()){
-			if(input.eof())		{	break;			}
-			else				{	output << c;	}
-		}
-		
-		input.close();
-		output.close();
-	}
-	catch(exception& e) {
-		m->errorOut(e, "AlignCommand", "appendAlignFiles");
-		exit(1);
-	}
-}
 //**********************************************************************************************************************
 
 void AlignCommand::appendReportFiles(string temp, string filename) {
@@ -1025,10 +1002,11 @@ void AlignCommand::appendReportFiles(string temp, string filename) {
 
 		while (!input.eof())	{	char c = input.get(); if (c == 10 || c == 13){	break;	}	} // get header line
 				
-		while(char c = input.get()){
-			if(input.eof())		{	break;			}
-			else				{	output << c;	}
-		}
+        char buffer[4096];        
+        while (!input.eof()) {
+            input.read(buffer, 4096);
+            output.write(buffer, input.gcount());
+        }
 		
 		input.close();
 		output.close();
diff --git a/aligncommand.h b/aligncommand.h
index 7eeaa1e..d4b7e78 100644
--- a/aligncommand.h
+++ b/aligncommand.h
@@ -55,7 +55,6 @@ private:
 	
 	int driver(linePair*, string, string, string, string);
 	int createProcesses(string, string, string, string);
-	void appendAlignFiles(string, string); 
 	void appendReportFiles(string, string);
 	
 	#ifdef USE_MPI
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp
index b6dc24f..158069e 100644
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -919,8 +919,8 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
 		else { m->mothurRemove(accnos); } //remove so other files can be renamed to it
         
 		for(int i=0;i<processIDS.size();i++){
-			appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
-			appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
+			m->appendFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
+			m->appendFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
             if (!(m->isBlank(accnos + toString(processIDS[i]) + ".temp"))) {
 				nonBlankAccnosFiles.push_back(accnos + toString(processIDS[i]) + ".temp");
 			}else { m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));  }
@@ -934,7 +934,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
 			rename(nonBlankAccnosFiles[0].c_str(), accnos.c_str());
 			
 			for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-				appendTaxFiles(nonBlankAccnosFiles[h], accnos);
+				m->appendFiles(nonBlankAccnosFiles[h], accnos);
 				m->mothurRemove(nonBlankAccnosFiles[h]);
 			}
 		}else { //recreate the accnosfile if needed
@@ -951,30 +951,6 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
 		exit(1);
 	}
 }
-/**************************************************************************************************/
-
-void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
-	try{
-		
-		ofstream output;
-		ifstream input;
-		m->openOutputFileAppend(filename, output);
-		m->openInputFile(temp, input);
-		
-		while(char c = input.get()){
-			if(input.eof())		{	break;			}
-			else				{	output << c;	}
-		}
-		
-		input.close();
-		output.close();
-	}
-	catch(exception& e) {
-		m->errorOut(e, "ClassifySeqsCommand", "appendTaxFiles");
-		exit(1);
-	}
-}
-
 //**********************************************************************************************************************
 
 int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string accnos, string filename){
diff --git a/classifyseqscommand.h b/classifyseqscommand.h
index acee70c..1316a25 100644
--- a/classifyseqscommand.h
+++ b/classifyseqscommand.h
@@ -75,7 +75,6 @@ private:
 	bool abort, probs, save, flip;
 	
 	int driver(linePair*, string, string, string, string);
-	void appendTaxFiles(string, string);
 	int createProcesses(string, string, string, string); 
 	string addUnclassifieds(string, int);
 	
diff --git a/consensusseqscommand.cpp b/consensusseqscommand.cpp
index 223e5db..55ec802 100644
--- a/consensusseqscommand.cpp
+++ b/consensusseqscommand.cpp
@@ -653,38 +653,29 @@ int ConsensusSeqsCommand::readFasta(){
 
 int ConsensusSeqsCommand::readNames(){
 	 try{
-		 
-		 ifstream in;
-		 m->openInputFile(namefile, in);
-		 
-		 string thisname, repnames;
-		 map<string, string>::iterator it;
-		 
-		 bool error = false;
-		 
-		 while(!in.eof()){
-			 
-			 if (m->control_pressed) { break; }
-			 
-			 in >> thisname;		m->gobble(in);		//read from first column
-			 in >> repnames;			//read from second column
-			 
-			 it = nameMap.find(thisname);
+         map<string, string> temp;
+         map<string, string>::iterator it;
+         bool error = false;
+         
+         m->readNames(namefile, temp); //use central buffered read
+         
+         for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end(); itTemp++) {
+             string thisname, repnames;
+             thisname = itTemp->first;
+             repnames = itTemp->second;
+             
+             it = nameMap.find(thisname);
 			 if (it != nameMap.end()) { //then this sequence was in the fastafile
-				 
+				 nameFileMap[thisname] = repnames;	//for later when outputting the new namesFile if the list file is unique
+                 
 				 vector<string> splitRepNames;
 				 m->splitAtComma(repnames, splitRepNames);
 				 
-				 nameFileMap[thisname] = repnames;	//for later when outputting the new namesFile if the list file is unique
 				 for (int i = 0; i < splitRepNames.size(); i++) { nameMap[splitRepNames[i]] = thisname; }
 				 
 			 }else{	m->mothurOut("[ERROR]: " + thisname + " is not in the fasta file, please correct."); m->mothurOutEndLine(); error = true; }
-			 
-			 m->gobble(in);
-		 }
-		 
-		 in.close();
-		 
+         }
+         
 		 if (error) { m->control_pressed = true; }
  
 		 return 0;
diff --git a/createdatabasecommand.cpp b/createdatabasecommand.cpp
index 1da67e6..57d5264 100644
--- a/createdatabasecommand.cpp
+++ b/createdatabasecommand.cpp
@@ -209,7 +209,7 @@ int CreateDatabaseCommand::execute(){
         
         //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
         map<string, string> repNames;
-        int numUniqueNamesFile = readNames(repNames);
+        int numUniqueNamesFile = m->readNames(repnamesfile, repNames);
         
         //are there the same number of otus in the fasta and name files
         if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
@@ -394,32 +394,6 @@ vector<int> CreateDatabaseCommand::readFasta(vector<Sequence>& seqs){
 		exit(1);
 	}
 }
-/**********************************************************************************************************************/
-int CreateDatabaseCommand::readNames(map<string, string>& nameMap) { 
-	try {
-		
-		//open input file
-		ifstream in;
-		m->openInputFile(repnamesfile, in);
-		
-		while (!in.eof()) {
-			if (m->control_pressed) { break; }
-			
-			string firstCol, secondCol;
-			in >> firstCol >> secondCol; m->gobble(in);
-			
-			nameMap[secondCol] = firstCol;
-		}
-		in.close();
-		
-		return nameMap.size();
-		
-	}
-	catch(exception& e) {
-		m->errorOut(e, "CreateDatabaseCommand", "readNames");
-		exit(1);
-	}
-}
 //**********************************************************************************************************************
 ListVector* CreateDatabaseCommand::getList(){
 	try {
diff --git a/createdatabasecommand.h b/createdatabasecommand.h
index 643ff6e..37e3013 100644
--- a/createdatabasecommand.h
+++ b/createdatabasecommand.h
@@ -39,7 +39,6 @@ private:
 		
 	vector<int> readFasta(vector<Sequence>&);
     vector<int> readTax(vector<string>&);
-    int readNames(map<string, string>&); 
 	ListVector* getList();
 	
 };
diff --git a/groupmap.cpp b/groupmap.cpp
index 92a43e9..612b236 100644
--- a/groupmap.cpp
+++ b/groupmap.cpp
@@ -23,84 +23,138 @@
 
 /************************************************************/
 int GroupMap::readMap() {
-		string seqName, seqGroup;
+    try {
+        string seqName, seqGroup;
 		int error = 0;
-
-		while(fileHandle){
-			fileHandle >> seqName;	m->gobble(fileHandle);		//read from first column
-			fileHandle >> seqGroup;			//read from second column
-			
-			if (m->control_pressed) {  fileHandle.close();  return 1; }
-	
-			setNamesOfGroups(seqGroup);
-			
-			it = groupmap.find(seqName);
-			
-			if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
-			else {
-				groupmap[seqName] = seqGroup;	//store data in map
-				seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
-			}
-			m->gobble(fileHandle);
-		}
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+    
+        while (!fileHandle.eof()) {
+            if (m->control_pressed) { fileHandle.close();  return 1; }
+        
+            fileHandle.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+        
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+            
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;	//store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
 		fileHandle.close();
+        
 		m->setAllGroups(namesOfGroups);
 		return error;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "GroupMap", "readMap");
+		exit(1);
+	}
 }
 /************************************************************/
 int GroupMap::readDesignMap() {
-		string seqName, seqGroup;
+    try {
+        string seqName, seqGroup;
 		int error = 0;
-
-		while(fileHandle){
-			fileHandle >> seqName;	m->gobble(fileHandle);		//read from first column
-			fileHandle >> seqGroup;			//read from second column
-			
-			if (m->control_pressed) {  fileHandle.close();  return 1; }
-	
-			setNamesOfGroups(seqGroup);
-			
-			it = groupmap.find(seqName);
-			
-			if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine();  }
-			else {
-				groupmap[seqName] = seqGroup;	//store data in map
-				seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
-			}
-			m->gobble(fileHandle);
-		}
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        
+        while (!fileHandle.eof()) {
+            if (m->control_pressed) { fileHandle.close();  return 1; }
+            
+            fileHandle.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;	//store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
 		fileHandle.close();
+        
 		m->setAllGroups(namesOfGroups);
 		return error;
+    }
+	catch(exception& e) {
+		m->errorOut(e, "GroupMap", "readDesignMap");
+		exit(1);
+	}
 }
 /************************************************************/
 int GroupMap::readDesignMap(string filename) {
-    groupFileName = filename;
-	m->openInputFile(filename, fileHandle);
-	index = 0;
-    string seqName, seqGroup;
-    int error = 0;
-    
-    while(fileHandle){
-        fileHandle >> seqName;	m->gobble(fileHandle);		//read from first column
-        fileHandle >> seqGroup;			//read from second column
-        
-        if (m->control_pressed) {  fileHandle.close();  return 1; }
-        
-        setNamesOfGroups(seqGroup);
-        
-        it = groupmap.find(seqName);
+    try {
+        groupFileName = filename;
+        m->openInputFile(filename, fileHandle);
+        index = 0;
+        string seqName, seqGroup;
+		int error = 0;
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
         
-        if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine();  }
-        else {
-            groupmap[seqName] = seqGroup;	//store data in map
-            seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+        while (!fileHandle.eof()) {
+            if (m->control_pressed) { fileHandle.close();  return 1; }
+            
+            fileHandle.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;	//store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
         }
-        m->gobble(fileHandle);
+		fileHandle.close();
+        
+		m->setAllGroups(namesOfGroups);
+		return error;
     }
-    fileHandle.close();
-    m->setAllGroups(namesOfGroups);
-    return error;
+	catch(exception& e) {
+		m->errorOut(e, "GroupMap", "readDesignMap");
+		exit(1);
+	}
 }
 /************************************************************/
 int GroupMap::getNumGroups() { return namesOfGroups.size();	}
diff --git a/mothurout.cpp b/mothurout.cpp
index 6ecb86f..f98bea8 100644
--- a/mothurout.cpp
+++ b/mothurout.cpp
@@ -1092,11 +1092,14 @@ int MothurOut::appendFiles(string temp, string filename) {
 		
 		int numLines = 0;
 		if (ableToOpen == 0) { //you opened it
-			while(!input.eof()){
-                char c = input.get();
-				if(input.eof())		{	break;			}
-				else				{	output << c;	if (c == '\n') {numLines++;} }
-			}
+            
+            char buffer[4096];        
+            while (!input.eof()) {
+                input.read(buffer, 4096);
+                output.write(buffer, input.gcount());
+                //count number of lines
+                for (int i = 0; i < input.gcount(); i++) {  if (buffer[i] == '\n') {numLines++;} }
+            }
 			input.close();
 		}
 		
@@ -1454,6 +1457,30 @@ float MothurOut::ceilDist(float dist, int precision){
 		exit(1);
 	}
 }
+/***********************************************************************/
+
+vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
+	try {
+        vector<string> pieces;
+        
+        for (int i = 0; i < size; i++) {
+            if (!isspace(buffer[i]))  { rest += buffer[i];  }
+            else {
+                pieces.push_back(rest);  rest = "";
+                while (i < size) {  //gobble white space
+                    if (isspace(buffer[i])) { i++; }
+                    else { rest = buffer[i];  break; } //cout << "next piece buffer = " << nextPiece << endl;
+                } 
+            }
+        }
+        
+        return pieces;
+	}
+	catch(exception& e) {
+		errorOut(e, "MothurOut", "parsePieces");
+		exit(1);
+	}
+}
 /**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, map<string, string>& nameMap) { 
 	try {
@@ -1461,14 +1488,25 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
 		//open input file
 		ifstream in;
 		openInputFile(namefile, in);
-		
+
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
 		while (!in.eof()) {
 			if (control_pressed) { break; }
 			
-			string firstCol, secondCol;
-			in >> firstCol >> secondCol; gobble(in);
-			
-			nameMap[firstCol] = secondCol;
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+             
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+            }
 		}
 		in.close();
 		
@@ -1488,21 +1526,33 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
 		ifstream in;
 		openInputFile(namefile, in);
 		
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
 		while (!in.eof()) {
 			if (control_pressed) { break; }
 			
-			string firstCol, secondCol;
-			in >> firstCol >> secondCol; gobble(in);
-			
-			vector<string> temp;
-			splitAtComma(secondCol, temp);
-			
-			nameMap[firstCol] = temp;
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    vector<string> temp;
+                    splitAtComma(secondCol, temp);
+                    nameMap[firstCol] = temp;
+                    pairDone = false;  
+                } 
+            }
 		}
 		in.close();
-		
+        
 		return nameMap.size();
-		
 	}
 	catch(exception& e) {
 		errorOut(e, "MothurOut", "readNames");
@@ -1519,18 +1569,30 @@ map<string, int> MothurOut::readNames(string namefile) {
 		ifstream in;
 		openInputFile(namefile, in);
 		
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
 		while (!in.eof()) {
 			if (control_pressed) { break; }
 			
-			string firstCol, secondCol;
-			in >> firstCol;  gobble(in);
-            in >> secondCol; gobble(in);
-			
-			int num = getNumNames(secondCol);
-			
-			nameMap[firstCol] = num;
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                } 
+            }
 		}
-		in.close();
+        in.close();
 		
 		return nameMap;
 		
@@ -1549,27 +1611,41 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
 		ifstream in;
 		openInputFile(namefile, in);
 		
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
 		while (!in.eof()) {
 			if (control_pressed) { break; }
 			
-			string firstCol, secondCol;
-			in >> firstCol >> secondCol; gobble(in);
-			
-			int num = getNumNames(secondCol);
-			
-			map<string, string>::iterator it = fastamap.find(firstCol);
-			if (it == fastamap.end()) {
-				error = 1;
-				mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
-			}else {
-				seqPriorityNode temp(num, it->second, firstCol);
-				nameVector.push_back(temp);
-			}
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    int num = getNumNames(secondCol);
+                    
+                    map<string, string>::iterator it = fastamap.find(firstCol);
+                    if (it == fastamap.end()) {
+                        error = 1;
+                        mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
+                    }else {
+                        seqPriorityNode temp(num, it->second, firstCol);
+                        nameVector.push_back(temp);
+                    }
+                    
+                    pairDone = false;  
+                } 
+            }
 		}
-		in.close();
-		
+        in.close();
+        
 		return error;
-		
 	}
 	catch(exception& e) {
 		errorOut(e, "MothurOut", "readNames");
diff --git a/mothurout.h b/mothurout.h
index cc8bfb6..b19c05a 100644
--- a/mothurout.h
+++ b/mothurout.h
@@ -97,6 +97,7 @@ class MothurOut {
 		string getline(istringstream&);
 		void gobble(istream&);
 		void gobble(istringstream&);
+        vector<string> splitWhiteSpace(string& rest, char[], int);
 		map<string, int> readNames(string);
 		int readNames(string, map<string, string>&);
 		int readNames(string, map<string, vector<string> >&);
diff --git a/screenseqscommand.h b/screenseqscommand.h
index 291d8e6..54c8fbb 100644
--- a/screenseqscommand.h
+++ b/screenseqscommand.h
@@ -60,7 +60,6 @@ private:
 	vector<string> outputNames;
 	vector<string> optimize;
 	map<string, int> nameMap;
-	int readNames();
 	
 	int getSummary(vector<unsigned long long>&);
 	int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp
index 717b1d3..aebba6b 100644
--- a/subsamplecommand.cpp
+++ b/subsamplecommand.cpp
@@ -639,34 +639,13 @@ int SubSampleCommand::getNames() {
 int SubSampleCommand::readNames() {
 	try {
 		
-		ifstream in;
-		m->openInputFile(namefile, in);
-		
-		string thisname, repnames;
-		map<string, vector<string> >::iterator it;
-		
-		while(!in.eof()){
-			
-			if (m->control_pressed) { in.close(); return 0; }
-			
-			in >> thisname;		m->gobble(in);		//read from first column
-			in >> repnames;			//read from second column
-			
-			it = nameMap.find(thisname);
-			if (it == nameMap.end()) {
-				
-				vector<string> splitRepNames;
-				m->splitAtComma(repnames, splitRepNames);
-				
-				nameMap[thisname] = splitRepNames;	
-				for (int i = 0; i < splitRepNames.size(); i++) { names.push_back(splitRepNames[i]); }
-				
-			}else{	m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine();  }
-			
-			m->gobble(in);
-		}
-		in.close();	
-		
+        nameMap.clear();
+        m->readNames(namefile, nameMap);
+        
+        //save names of all sequences
+        map<string, vector<string> >::iterator it;
+        for (it = nameMap.begin(); it != nameMap.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { names.push_back((it->second)[i]); } }
+        
 		return 0;
 		
 	}
diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp
index c019a70..6f5bb97 100644
--- a/trimseqscommand.cpp
+++ b/trimseqscommand.cpp
@@ -687,6 +687,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 						currQual.printQScores(trimQualFile);
 					}
 					
+                    
 					if(nameFile != ""){
 						map<string, string>::iterator itName = nameMap.find(currSeq.getName());
 						if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
@@ -708,11 +709,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 							
 							outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
 							
+                            int numRedundants = 0;
 							if (nameFile != "") {
 								map<string, string>::iterator itName = nameMap.find(currSeq.getName());
 								if (itName != nameMap.end()) { 
 									vector<string> thisSeqsNames; 
 									m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                    numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
 									for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
 										outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
 									}
@@ -720,8 +723,8 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 							}
 							
 							map<string, int>::iterator it = groupCounts.find(thisGroup);
-							if (it == groupCounts.end()) {	groupCounts[thisGroup] = 1; }
-							else { groupCounts[it->first]++; }
+							if (it == groupCounts.end()) {	groupCounts[thisGroup] = 1 + numRedundants; }
+							else { groupCounts[it->first] += (1 + numRedundants); }
 								
 						}
 					}
diff --git a/trimseqscommand.h b/trimseqscommand.h
index ba4e614..e280c8a 100644
--- a/trimseqscommand.h
+++ b/trimseqscommand.h
@@ -413,11 +413,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
 							
 							outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
 							
+                            int numRedundants = 0;
 							if (pDataArray->nameFile != "") {
 								map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
 								if (itName != pDataArray->nameMap.end()) { 
 									vector<string> thisSeqsNames; 
 									pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                    numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
 									for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
 										outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
 									}
@@ -425,8 +427,8 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
 							}
 							
 							map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
-							if (it == pDataArray->groupCounts.end()) {	pDataArray->groupCounts[thisGroup] = 1; }
-							else { pDataArray->groupCounts[it->first]++; }
+							if (it == pDataArray->groupCounts.end()) {	pDataArray->groupCounts[thisGroup] = 1 + numRedundants; }
+							else { pDataArray->groupCounts[it->first] += (1 + numRedundants); }
                             
 						}
 					}
-- 
2.39.5