From afcbef163b4f32d0ff25a834cb9af8eef8d06ffa Mon Sep 17 00:00:00 2001
From: westcott <westcott>
Date: Fri, 27 Aug 2010 13:50:03 +0000
Subject: [PATCH] fixed phylo.diversity

---
 aligncommand.cpp          |   3 +-
 clearcutcommand.cpp       |   5 +-
 makefile                  |   4 +-
 mothurout.cpp             |  67 +++++++++++++++++-----
 phylodiversitycommand.cpp |  31 +++++++---
 phylodiversitycommand.h   |   2 +-
 preclustercommand.cpp     | 117 +++++++++++++++++++++-----------------
 preclustercommand.h       |   7 ++-
 readotu.cpp               |   2 +-
 readtreecommand.cpp       |  82 +++++++++++++-------------
 readtreecommand.h         |   1 +
 sharedcommand.cpp         |   8 +++
 sharedrabundvector.cpp    |   1 +
 summarycommand.cpp        |   6 +-
 summarycommand.h          |   2 +-
 tree.cpp                  |  12 ++--
 treegroupscommand.cpp     |  17 +++---
 17 files changed, 225 insertions(+), 142 deletions(-)
diff --git a/aligncommand.cpp b/aligncommand.cpp
index 0f4c89a..5aabeef 100644
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -90,6 +90,8 @@ AlignCommand::AlignCommand(string option)  {
 				
 				//go through files and make sure they are good, if not, then disregard them
 				for (int i = 0; i < candidateFileNames.size(); i++) {
+					//candidateFileNames[i] = m->getFullPathName(candidateFileNames[i]);
+					
 					if (inputDir != "") {
 						string path = m->hasPath(candidateFileNames[i]);
 						//if the user has not given a path then, add inputdir. else leave path alone.
@@ -348,7 +350,6 @@ int AlignCommand::execute(){
 #else
 
 		vector<unsigned long int> positions = m->divideFile(candidateFileNames[s], processors);
-				
 		for (int i = 0; i < (positions.size()-1); i++) {
 			lines.push_back(new linePair(positions[i], positions[(i+1)]));
 		}	
diff --git a/clearcutcommand.cpp b/clearcutcommand.cpp
index 7d0d539..eabcc73 100644
--- a/clearcutcommand.cpp
+++ b/clearcutcommand.cpp
@@ -137,7 +137,6 @@ void ClearcutCommand::help(){
 	try {
 		m->mothurOut("The clearcut command interfaces mothur with the clearcut program written by Initiative for Bioinformatics and Evolutionary Studies (IBEST) at the University of Idaho.\n");
 		m->mothurOut("For more information about clearcut refer to http://bioinformatics.hungry.com/clearcut/ \n");
-		m->mothurOut("The clearcut executable must be in a folder called clearcut in the same folder as your mothur executable, similar to mothur's requirements for using blast. \n");
 		m->mothurOut("The clearcut command parameters are phylip, fasta, version, verbose, quiet, seed, norandom, shuffle, neighbor, expblen, expdist, ntrees, matrixout, stdout, kimura, jukes, protein, DNA. \n");
 		m->mothurOut("The phylip parameter allows you to enter your phylip formatted distance matrix. \n");
 		m->mothurOut("The fasta parameter allows you to enter your aligned fasta file, if you enter a fastafile you specify if the sequences are DNA or protein using the DNA or protein parameters. \n");
@@ -255,6 +254,10 @@ int ClearcutCommand::execute() {
 		
 		clearcut_main(numArgs, clearcutParameters); 
 		
+		//free memory
+		for(int i = 0; i < cPara.size(); i++)  {  delete[] cPara[i];  }
+		delete[] clearcutParameters; 
+		
 		if (!stdoutWanted) {	
 			m->mothurOutEndLine();
 			m->mothurOut("Output File Names: "); m->mothurOutEndLine();
diff --git a/makefile b/makefile
index dc5065d..ffee66e 100644
--- a/makefile
+++ b/makefile
@@ -15,8 +15,8 @@ CXXFLAGS += -O3
 
 MOTHUR_FILES = "\"../Release\""
 
-RELEASE_DATE = "\"8/5/2010\""
-VERSION = "\"1.12.3\""
+RELEASE_DATE = "\"8/30/2010\""
+VERSION = "\"1.13.0\""
 
 CXXFLAGS += -DRELEASE_DATE=${RELEASE_DATE} -DVERSION=${VERSION}
 
diff --git a/mothurout.cpp b/mothurout.cpp
index fe3ea09..88a18ed 100644
--- a/mothurout.cpp
+++ b/mothurout.cpp
@@ -257,6 +257,8 @@ int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
 /***********************************************************************/
 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
 	try {
+		fileName = getFullPathName(fileName);
+		
 		fileHandle.open(fileName.c_str(), ios::app);
 		if(!fileHandle) {
 			mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
@@ -445,6 +447,9 @@ string MothurOut::getExtension(string longName){
 /***********************************************************************/
 bool MothurOut::isBlank(string fileName){
 	try {
+		
+		fileName = getFullPathName(fileName);
+		
 		ifstream fileHandle;
 		fileHandle.open(fileName.c_str());
 		if(!fileHandle) {
@@ -829,7 +834,8 @@ vector<unsigned long int> MothurOut::setFilePosFasta(string filename, int& num)
 /**************************************************************************************************/
 vector<unsigned long int> MothurOut::setFilePosEachLine(string filename, int& num) {
 	try {
-
+			filename = getFullPathName(filename);
+			
 			vector<unsigned long int> positions;
 			ifstream in;
 			openInputFile(filename, in);
@@ -851,7 +857,7 @@ vector<unsigned long int> MothurOut::setFilePosEachLine(string filename, int& nu
 		
 			FILE * pFile;
 			unsigned long int size;
-		
+			
 			//get num bytes in file
 			pFile = fopen (filename.c_str(),"rb");
 			if (pFile==NULL) perror ("Error opening file");
@@ -881,6 +887,8 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
 		FILE * pFile;
 		unsigned long int size;
 		
+		filename = getFullPathName(filename);
+		
 		//get num bytes in file
 		pFile = fopen (filename.c_str(),"rb");
 		if (pFile==NULL) perror ("Error opening file");
@@ -893,7 +901,7 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
 		//estimate file breaks
 		unsigned long int chunkSize = 0;
 		chunkSize = size / proc;
-		
+	
 		//file to small to divide by processors
 		if (chunkSize == 0)  {  proc = 1;	filePos.push_back(size); return filePos;	}
 	
@@ -914,15 +922,16 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
 			
 			//there was not another sequence before the end of the file
 			unsigned long int sanityPos = in.tellg();
-			if (sanityPos = -1) {	break;  }
-			else {   filePos.push_back(newSpot);  }
+
+			if (sanityPos == -1) {	break;  }
+			else {  filePos.push_back(newSpot);  }
 			
 			in.close();
 		}
 		
 		//save end pos
 		filePos.push_back(size);
-		
+
 		//sanity check filePos
 		for (int i = 0; i < (filePos.size()-1); i++) {
 			if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
@@ -1112,7 +1121,21 @@ void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbo
 //This function parses the estimator options and puts them in a vector
 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
 	try {
-		string individual;
+		string individual = "";
+		int estimLength = estim.size();
+		for(int i=0;i<estimLength;i++){
+			if(estim[i] == '-'){
+				container.push_back(individual);
+				individual = "";				
+			}
+			else{
+				individual += estim[i];
+			}
+		}
+		container.push_back(individual);
+
+	
+	/*	string individual;
 		
 		while (estim.find_first_of('-') != -1) {
 			individual = estim.substr(0,estim.find_first_of('-'));
@@ -1122,7 +1145,7 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
 			}
 		}
 		//get last one
-		container.push_back(estim);
+		container.push_back(estim); */
 	}
 	catch(exception& e) {
 		errorOut(e, "MothurOut", "splitAtDash");
@@ -1134,17 +1157,31 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
 //This function parses the label options and puts them in a set
 void MothurOut::splitAtDash(string& estim, set<string>& container) {
 	try {
-		string individual;
-		
-		while (estim.find_first_of('-') != -1) {
-			individual = estim.substr(0,estim.find_first_of('-'));
-			if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
-				estim = estim.substr(estim.find_first_of('-')+1, estim.length());
+		string individual = "";
+		int estimLength = estim.size();
+		for(int i=0;i<estimLength;i++){
+			if(estim[i] == '-'){
 				container.insert(individual);
+				individual = "";				
+			}
+			else{
+				individual += estim[i];
 			}
 		}
+		container.insert(individual);
+
+	//	string individual;
+		
+	//	while (estim.find_first_of('-') != -1) {
+	//		individual = estim.substr(0,estim.find_first_of('-'));
+	//		if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
+	//			estim = estim.substr(estim.find_first_of('-')+1, estim.length());
+	//			container.insert(individual);
+	//		}
+	//	}
 		//get last one
-		container.insert(estim);
+	//	container.insert(estim);
+	
 	}
 	catch(exception& e) {
 		errorOut(e, "MothurOut", "splitAtDash");
diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp
index 619a7f7..98002e7 100644
--- a/phylodiversitycommand.cpp
+++ b/phylodiversitycommand.cpp
@@ -183,17 +183,19 @@ int PhyloDiversityCommand::execute(){
 		
 				//initialize counts
 				map<string, int> counts;
-				for (int j = 0; j < globaldata->Groups.size(); j++) {  counts[globaldata->Groups[j]] = 0; }
+				map< string, set<int> > countedBranch;  
+				for (int j = 0; j < globaldata->Groups.size(); j++) {  counts[globaldata->Groups[j]] = 0; countedBranch[globaldata->Groups[j]].insert(-2);  }  //add dummy index to initialize countedBranch sets
 				
 				for(int k = 0; k < numLeafNodes; k++){
 						
 					if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	remove(outputNames[i].c_str()); 	} return 0; }
 					
 					//calc branch length of randomLeaf k
-					float br = calcBranchLength(trees[i], randomLeaf[k]);
+					float br = calcBranchLength(trees[i], randomLeaf[k], countedBranch);
 			
 					//for each group in the groups update the total branch length accounting for the names file
 					vector<string> groups = trees[i]->tree[randomLeaf[k]].getGroup();
+					
 					for (int j = 0; j < groups.size(); j++) {
 						int numSeqsInGroupJ = 0;
 						map<string, int>::iterator it;
@@ -201,9 +203,11 @@ int PhyloDiversityCommand::execute(){
 						if (it != trees[i]->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
 							numSeqsInGroupJ = it->second;
 						}
-					
-						for (int s = (counts[groups[j]]+1); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
-							diversity[groups[j]][s] = diversity[groups[j]][s-1] + ((float) numSeqsInGroupJ * br);
+						
+						if (numSeqsInGroupJ != 0) {	diversity[groups[j]][(counts[groups[j]]+1)] = diversity[groups[j]][counts[groups[j]]] + br;  }
+						
+						for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
+							diversity[groups[j]][s] = diversity[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
 						}
 						counts[groups[j]] += numSeqsInGroupJ;
 					}
@@ -268,6 +272,7 @@ void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofst
 					else		{	score = div[globaldata->Groups[j]][numSampled] / (float)numIters;	}
 					
 					out << setprecision(4) << score << '\t';
+	
 				}else { out << "NA" << '\t'; }
 			}
 			out << endl;
@@ -318,26 +323,34 @@ void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float>
 	}
 }
 //**********************************************************************************************************************
-float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf){
+float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
 	try {
 
 		//calc the branch length
 		//while you aren't at root
 		float sum = 0.0;
 		int index = leaf;
-
+		
+		vector<string> groups = t->tree[leaf].getGroup();
+					
 		while(t->tree[index].getParent() != -1){
 			
 			//if you have a BL
 			if(t->tree[index].getBranchLength() != -1){
-				sum += abs(t->tree[index].getBranchLength());
+				if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
+					sum += abs(t->tree[index].getBranchLength());
+					for (int j = 0; j < groups.size(); j++) {  counted[groups[j]].insert(index);  }
+				}
 			}
 			index = t->tree[index].getParent();
 		}
 			
 		//get last breanch length added
 		if(t->tree[index].getBranchLength() != -1){
-			sum += abs(t->tree[index].getBranchLength());
+			if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
+				sum += abs(t->tree[index].getBranchLength());
+				for (int j = 0; j < groups.size(); j++) {  counted[groups[j]].insert(index);  }
+			}
 		}
 		
 		return sum;
diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h
index 76c996c..a7e0d2e 100644
--- a/phylodiversitycommand.h
+++ b/phylodiversitycommand.h
@@ -33,7 +33,7 @@ class PhyloDiversityCommand : public Command {
 		
 		void printData(set<int>&, map< string, vector<float> >&, ofstream&, int);
 		void printSumData(map< string, vector<float> >&, ofstream&, int);
-		float calcBranchLength(Tree*, int);
+		float calcBranchLength(Tree*, int, map< string, set<int> >&);
 };
 
 #endif
diff --git a/preclustercommand.cpp b/preclustercommand.cpp
index 0bd9132..a7f2113 100644
--- a/preclustercommand.cpp
+++ b/preclustercommand.cpp
@@ -125,69 +125,65 @@ int PreClusterCommand::execute(){
 		if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct."); m->mothurOutEndLine(); return 0;  }
 		if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); return 0;  }
 		
-		string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
-		string newFastaFile = fileroot + "precluster" + m->getExtension(fastafile);
-		string newNamesFile = fileroot + "precluster.names";
-		ofstream outFasta;
-		ofstream outNames;
-		
-		m->openOutputFile(newFastaFile, outFasta);
-		m->openOutputFile(newNamesFile, outNames);
-
+		//clear sizes since you only needed this info to build the alignSeqs seqPNode structs
+//		sizes.clear();
+	
 		//sort seqs by number of identical seqs
-		alignSeqs.sort(comparePriority);
-
+		sort(alignSeqs.begin(), alignSeqs.end(), comparePriority);
+	
 		int count = 0;
-		int i = 0;
+
 		//think about running through twice...
-		list<seqPNode>::iterator itList;
-		list<seqPNode>::iterator itList2;
-		for (itList = alignSeqs.begin(); itList != alignSeqs.end();) {
+		for (int i = 0; i < numSeqs; i++) {
 			
-			//try to merge it with all smaller seqs
-			for (itList2 = alignSeqs.begin(); itList2 != alignSeqs.end();) {
-				
-				if (m->control_pressed) { outFasta.close(); outNames.close(); remove(newFastaFile.c_str()); remove(newNamesFile.c_str());  return 0; }
-				
+			//are you active
+			//			itActive = active.find(alignSeqs[i].seq.getName());
 			
-				if (itList->seq.getName() != itList2->seq.getName()) { //you don't want to merge with yourself
-					//are you within "diff" bases
+			if (alignSeqs[i].active) {  //this sequence has not been merged yet
+				
+				//try to merge it with all smaller seqs
+				for (int j = i+1; j < numSeqs; j++) {
 					
-					int mismatch = calcMisMatches((*itList).seq.getAligned(), (*itList2).seq.getAligned());
-
-					if (mismatch <= diffs) {
-						//merge
-						(*itList).names += ',' + (*itList2).names;
-						(*itList).numIdentical += (*itList2).numIdentical;
+					if (m->control_pressed) { return 0; }
+					
+					if (alignSeqs[j].active) {  //this sequence has not been merged yet
+						//are you within "diff" bases
+						int mismatch = calcMisMatches(alignSeqs[i].seq.getAligned(), alignSeqs[j].seq.getAligned());
 						
-						itList2 = alignSeqs.erase(itList2); //itList2--;
-						count++;
-					}else{ itList2++; }
-				}else{ itList2++; }
-
-			}
+						if (mismatch <= diffs) {
+							//merge
+							alignSeqs[i].names += ',' + alignSeqs[j].names;
+							alignSeqs[i].numIdentical += alignSeqs[j].numIdentical;
 
-			//ouptut this sequence
-			printData(outFasta, outNames, (*itList));
-			
-			//remove sequence
-			itList = alignSeqs.erase(itList); 
-			
-			i++;
+							alignSeqs[j].active = 0;
+							alignSeqs[j].numIdentical = 0;
+							count++;
+						}
+					}//end if j active
+				}//end if i != j
 			
+			//remove from active list 
+				alignSeqs[i].active = 0;
+				
+			}//end if active i
 			if(i % 100 == 0)	{ m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine();	}
 		}
-				
-		if(i % 100 != 0)	{ m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine();	}
 		
-		outFasta.close();
-		outNames.close();
+		if(numSeqs % 100 != 0)	{ m->mothurOut(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine();	}
+	
 		
-		if (m->control_pressed) {  remove(newFastaFile.c_str()); remove(newNamesFile.c_str());  return 0; }
-
-		m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences.");
-		m->mothurOut("Total number of sequences before precluster was " + toString(numSeqs) + "."); m->mothurOutEndLine();
-		m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); 
+		string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
+		
+		string newFastaFile = fileroot + "precluster" + m->getExtension(fastafile);
+		string newNamesFile = fileroot + "precluster.names";
+		
+		if (m->control_pressed) { return 0; }
+		
+		m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine();
+		m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); 
+		printData(newFastaFile, newNamesFile);
+		
+		m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); 
 		
 		if (m->control_pressed) { remove(newFastaFile.c_str()); remove(newNamesFile.c_str()); return 0; }
 		
@@ -287,10 +283,25 @@ int PreClusterCommand::calcMisMatches(string seq1, string seq2){
 
 /**************************************************************************************************/
 
-void PreClusterCommand::printData(ofstream& outFasta, ofstream& outNames, seqPNode thisSeq){
+void PreClusterCommand::printData(string newfasta, string newname){
 	try {
-		thisSeq.seq.printSequence(outFasta); 
-		outNames << thisSeq.seq.getName() << '\t' << thisSeq.names << endl;
+		ofstream outFasta;
+		ofstream outNames;
+		
+		m->openOutputFile(newfasta, outFasta);
+		m->openOutputFile(newname, outNames);
+				
+		
+		for (int i = 0; i < alignSeqs.size(); i++) {
+			if (alignSeqs[i].numIdentical != 0) {
+				alignSeqs[i].seq.printSequence(outFasta); 
+				outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+			}
+		}
+		
+		outFasta.close();
+		outNames.close();
+		
 	}
 	catch(exception& e) {
 		m->errorOut(e, "PreClusterCommand", "printData");
diff --git a/preclustercommand.h b/preclustercommand.h
index a26fda4..de6a572 100644
--- a/preclustercommand.h
+++ b/preclustercommand.h
@@ -20,8 +20,9 @@ struct seqPNode {
 	int numIdentical;
 	Sequence seq;
 	string names;
+	bool active;
 	seqPNode() {}
-	seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm) {}
+	seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm), active(1) {}
 	~seqPNode() {}
 };
 /************************************************************/
@@ -38,7 +39,7 @@ private:
 	int diffs, length;
 	bool abort;
 	string fastafile, namefile, outputDir;
-	list<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
+	vector<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
 	map<string, string> names; //represents the names file first column maps to second column
 	map<string, int> sizes;  //this map a seq name to the number of identical seqs in the names file
 	map<string, int>::iterator itSize; 
@@ -48,7 +49,7 @@ private:
 	void readNameFile();
 	//int readNamesFASTA();
 	int calcMisMatches(string, string);
-	void printData(ofstream&, ofstream&, seqPNode); //fasta filename, names file name
+	void printData(string, string); //fasta filename, names file name
 };
 
 /************************************************************/
diff --git a/readotu.cpp b/readotu.cpp
index 3952abc..00448f8 100644
--- a/readotu.cpp
+++ b/readotu.cpp
@@ -35,7 +35,7 @@ void ReadOTUFile::read(GlobalData* globaldata){
 		//memory leak prevention
 		//if (globaldata->ginput != NULL) { delete globaldata->ginput;  }
 		globaldata->ginput = input;	//saving to be used by collector and rarefact commands.
-	
+
 		if ((globaldata->getFormat() == "list") || (globaldata->getFormat() == "rabund") || (globaldata->getFormat() == "sabund")) {//you are reading a list, rabund or sabund file for collect, rarefaction or summary.
 
 //cout << input << '\t' << globaldata << endl;
diff --git a/readtreecommand.cpp b/readtreecommand.cpp
index 0a86f36..fdfdb05 100644
--- a/readtreecommand.cpp
+++ b/readtreecommand.cpp
@@ -154,46 +154,47 @@ int ReadTreeCommand::execute(){
 		}
 
 		
-//		Sarah, isn't this checking already done when assigning the sequences to the groups?  it makes read.tree
-//		wicked slow...  For some reason my tree is coming in here eventhough the number of sequences in the tree
-//		agrees with the number of lines in the name file and the number of sequences represented by the name file
-//		agrees with the number of sequences in the group file
-
-		//output any names that are in group file but not in tree
+		//if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
+		int numNamesInTree;
+		if (namefile != "")  {  
+			if (numUniquesInName == globaldata->Treenames.size()) {  numNamesInTree = nameMap.size();  }
+			else {   numNamesInTree = globaldata->Treenames.size();  }
+		}else {  numNamesInTree = globaldata->Treenames.size();  }
 		
-//		if (globaldata->Treenames.size() < treeMap->getNumSeqs()) {
-//			cout << "in here" << endl;
-//			for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
-//				//is that name in the tree?
-//				int count = 0;
-//				for (int j = 0; j < globaldata->Treenames.size(); j++) {
-//					if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
-//					count++;
-//				}
-//				
-//				if (m->control_pressed) {  
-//					for (int i = 0; i < T.size(); i++) {  delete T[i];  }
-//					globaldata->gTree.clear();
-//					delete globaldata->gTreemap;
-//					return 0;
-//				}
-//				
-//				//then you did not find it so report it 
-//				if (count == globaldata->Treenames.size()) { 
-//					//if it is in your namefile then don't remove
-//					map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
-//					
-//					if (it == nameMap.end()) {
-//						m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
-//						treeMap->removeSeq(treeMap->namesOfSeqs[i]);
-//						i--; //need this because removeSeq removes name from namesOfSeqs
-//					}
-//				}
-//			}
-//			
-//			globaldata->gTreemap = treeMap;
-//		}
 		
+		//output any names that are in group file but not in tree
+		if (numNamesInTree < treeMap->getNumSeqs()) {
+			for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
+				//is that name in the tree?
+				int count = 0;
+				for (int j = 0; j < globaldata->Treenames.size(); j++) {
+					if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
+					count++;
+				}
+				
+				if (m->control_pressed) {  
+					for (int i = 0; i < T.size(); i++) {  delete T[i];  }
+					globaldata->gTree.clear();
+					delete globaldata->gTreemap;
+					return 0;
+				}
+				
+				//then you did not find it so report it 
+				if (count == globaldata->Treenames.size()) { 
+					//if it is in your namefile then don't remove
+					map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
+					
+					if (it == nameMap.end()) {
+						m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
+						treeMap->removeSeq(treeMap->namesOfSeqs[i]);
+						i--; //need this because removeSeq removes name from namesOfSeqs
+					}
+				}
+			}
+			
+			globaldata->gTreemap = treeMap;
+		}
+
 		return 0;
 	}
 	catch(exception& e) {
@@ -205,6 +206,7 @@ int ReadTreeCommand::execute(){
 int ReadTreeCommand::readNamesFile() {
 	try {
 		globaldata->names.clear();
+		numUniquesInName = 0;
 		
 		ifstream in;
 		m->openInputFile(namefile, in);
@@ -215,6 +217,8 @@ int ReadTreeCommand::readNamesFile() {
 		while(!in.eof()) {
 			in >> first >> second; m->gobble(in);
 			
+			numUniquesInName++;
+
 			itNames = globaldata->names.find(first);
 			if (itNames == globaldata->names.end()) {  
 				globaldata->names[first] = second; 
@@ -224,7 +228,7 @@ int ReadTreeCommand::readNamesFile() {
 				m->splitAtComma(second, dupNames);
 				
 				for (int i = 0; i < dupNames.size(); i++) {	nameMap[dupNames[i]] = dupNames[i];  }
-			}else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); return 1; }			
+			}else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); namefile = ""; return 1; }			
 		}
 		in.close();
 		
diff --git a/readtreecommand.h b/readtreecommand.h
index b0d10f7..5d5b9b5 100644
--- a/readtreecommand.h
+++ b/readtreecommand.h
@@ -32,6 +32,7 @@ private:
 	map<string, string> nameMap;
 	
 	int readNamesFile();
+	int numUniquesInName;
 
 };
 
diff --git a/sharedcommand.cpp b/sharedcommand.cpp
index 2c1b6af..fb3691e 100644
--- a/sharedcommand.cpp
+++ b/sharedcommand.cpp
@@ -259,6 +259,7 @@ int SharedCommand::execute(){
 		globaldata->setGroupFile("");
 		globaldata->setSharedFile(filename);
 		
+		
 		if (m->control_pressed) { 
 				delete input;  globaldata->ginput = NULL; 
 				remove(filename.c_str()); 
@@ -286,11 +287,15 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
 		if (order.size() == 0) { //user has not specified an order so do aplabetically
 			sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
 			
+			globaldata->Groups.clear();
+			
 			//initialize bin values
 			for (int i = 0; i < thislookup.size(); i++) {
 				out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
 				thislookup[i]->print(out);
 				
+				globaldata->Groups.push_back(thislookup[i]->getGroup());
+				
 				RAbundVector rav = thislookup[i]->getRAbundVector();
 				m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
 				rav.print(*(filehandles[thislookup[i]->getGroup()]));
@@ -305,6 +310,7 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
 				myMap[thislookup[i]->getGroup()] = thislookup[i];
 			}
 			
+			globaldata->Groups.clear();
 			
 			//loop through ordered list and print the rabund
 			for (int i = 0; i < order.size(); i++) {
@@ -313,6 +319,8 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
 				if(myIt != myMap.end()) { //we found it
 					out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
 					(myIt->second)->print(out);
+					
+					globaldata->Groups.push_back((myIt->second)->getGroup());
 				
 					RAbundVector rav = (myIt->second)->getRAbundVector();
 					m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()]));
diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp
index c15dfac..6259ba2 100644
--- a/sharedrabundvector.cpp
+++ b/sharedrabundvector.cpp
@@ -112,6 +112,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0),
 			
 			if (globaldata->gGroupmap == NULL) { 
 				//save group in groupmap
+	
 				groupmap->namesOfGroups.push_back(groupN);
 				groupmap->groupIndex[groupN] = count;
 			}
diff --git a/summarycommand.cpp b/summarycommand.cpp
index 70470b0..208544a 100644
--- a/summarycommand.cpp
+++ b/summarycommand.cpp
@@ -445,7 +445,7 @@ vector<string> SummaryCommand::parseSharedFile(string filename) {
 	}
 }
 //**********************************************************************************************************************
-string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string> outputNames) {
+string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string>& outputNames) {
 	try {
 		
 		ofstream out;
@@ -501,7 +501,9 @@ string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<
 		}	
 		
 		//close each groups summary file
-		for (int i=0; i<outputNames.size(); i++) {  (*(filehandles[outputNames[i]])).close();  }
+		for (int i=0; i<outputNames.size(); i++) {  (*(filehandles[outputNames[i]])).close();  remove(outputNames[i].c_str());  }
+		outputNames.clear();
+		
 		out.close();
 		
 		//return combine file name
diff --git a/summarycommand.h b/summarycommand.h
index 6b3fa69..7c7ee2c 100644
--- a/summarycommand.h
+++ b/summarycommand.h
@@ -43,7 +43,7 @@ private:
 	vector<string> groups;
 	
 	vector<string> parseSharedFile(string);
-	string createGroupSummaryFile(int, int, vector<string>);
+	string createGroupSummaryFile(int, int, vector<string>&);
 
 
 };
diff --git a/tree.cpp b/tree.cpp
index a104477..dbb3f44 100644
--- a/tree.cpp
+++ b/tree.cpp
@@ -70,7 +70,7 @@ void Tree::addNamesToCounts() {
 				
 		//go through each leaf and update its pcounts and pgroups
 		
-		float A = clock();
+		//float A = clock();
 
 		for (int i = 0; i < numLeaves; i++) {
 
@@ -137,8 +137,8 @@ void Tree::addNamesToCounts() {
 			}//end else
 		}//end for		
 		
-		float B = clock();
-		cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl;	
+		//float B = clock();
+		//cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl;	
 
 	}
 	catch(exception& e) {
@@ -175,7 +175,7 @@ void Tree::setIndex(string searchName, int index) {
 /*****************************************************************/
 int Tree::assembleTree() {
 	try {
-		float A = clock();
+		//float A = clock();
 
 		//if user has given a names file we want to include that info in the pgroups and pcount info.
 		if(globaldata->names.size() != 0) {  addNamesToCounts();  }
@@ -187,8 +187,8 @@ int Tree::assembleTree() {
 			tree[i].pGroups = (mergeGroups(i));
 			tree[i].pcount = (mergeGcounts(i));
 		}
-		float B = clock();
-		cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
+		//float B = clock();
+		//cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
 		return 0;
 	}
 	catch(exception& e) {
diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp
index eb3c334..9594767 100644
--- a/treegroupscommand.cpp
+++ b/treegroupscommand.cpp
@@ -244,7 +244,8 @@ int TreeGroupCommand::execute(){
 		if (format == "sharedfile") {
 			//if the users entered no valid calculators don't execute command
 			if (treeCalculators.size() == 0) { m->mothurOut("You have given no valid calculators."); m->mothurOutEndLine(); return 0; }
-
+			
+			if (globaldata->gGroupmap != NULL) {  delete globaldata->gGroupmap;   globaldata->gGroupmap = NULL;  }
 			//you have groups
 			read = new ReadOTUFile(globaldata->inputFileName);	
 			read->read(&*globaldata); 
@@ -258,17 +259,17 @@ int TreeGroupCommand::execute(){
 			//used in tree constructor 
 			globaldata->runParse = false;
 			
-			//clear globaldatas old tree names if any
-			globaldata->Treenames.clear();
-		
-			//fills globaldatas tree names
-			globaldata->Treenames = globaldata->Groups;
-		
 			//create treemap class from groupmap for tree class to use
 			tmap = new TreeMap();
 			tmap->makeSim(globaldata->gGroupmap);
 			globaldata->gTreemap = tmap;
 			
+			//clear globaldatas old tree names if any
+			globaldata->Treenames.clear();
+			
+			//fills globaldatas tree names
+			globaldata->Treenames = globaldata->Groups;
+		
 			if (m->control_pressed) { return 0; }
 			
 			//create tree file
@@ -606,7 +607,7 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
 								subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
 								
 								data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
-								
+						//cout << thisLookup[k]->getGroup() << '\t' << thisLookup[l]->getGroup() << '\t' << (1.0 - data[0]) << endl;
 								if (m->control_pressed) { return 1; }
 								
 								//save values in similarity matrix
-- 
2.39.5