From: westcott <westcott>
Date: Wed, 13 May 2009 15:19:07 +0000 (+0000)
Subject: fixed bug with getting sharedrabunds for venn, heatmap and tree.shared commands,... 
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=37a229b590342d94cd22bb1f7f4127e85197a27f;p=mothur.git

fixed bug with getting sharedrabunds for venn, heatmap and tree.shared commands, speeding them up a bit.
---

diff --git a/distancecommand.cpp b/distancecommand.cpp
index 0714ab3..90ffb8d 100644
--- a/distancecommand.cpp
+++ b/distancecommand.cpp
@@ -52,8 +52,6 @@ DistanceCommand::DistanceCommand(){
 				
 		//reset calc for next command
 		globaldata->setCalc("");
-
-		
 	}
 	catch(exception& e) {
 		cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -138,16 +136,12 @@ int DistanceCommand::execute(){
 				int pid2 = fork();
 				if(pid2 > 0){
 					driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);	
-					//system(("cat " + distFile + "tempa" + " >> " + distFile).c_str());
 					appendFiles(distFile+"tempa", distFile);
-					//system(("rm " + distFile + "tempa").c_str());	
 					remove((distFile + "tempa").c_str());			
 				}
 				else{
 					driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);	
-					//system(("cat " + distFile + "tempb" + " >> " + distFile).c_str());
 					appendFiles(distFile+"tempb", distFile);
-					//system(("rm " + distFile + "tempb").c_str());
 					remove((distFile + "tempb").c_str());				
 				}
 				wait(NULL);
@@ -156,16 +150,12 @@ int DistanceCommand::execute(){
 				int pid3 = fork();
 				if(pid3 > 0){
 					driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);	
-					//system(("cat " + distFile + "tempc" + " >> " + distFile).c_str());
 					appendFiles(distFile+"tempc", distFile);
-					//system(("rm " + distFile + "tempc").c_str());
 					remove((distFile + "tempc").c_str());				
 				}
 				else{
 					driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);	
-					//system(("cat " + distFile + "tempd" + " >> " + distFile).c_str());
 					appendFiles(distFile+"tempd", distFile);
-					//system(("rm " + distFile + "tempd").c_str());
 					remove((distFile + "tempd").c_str());				
 				}
 				wait(NULL);
@@ -207,7 +197,6 @@ int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLi
 
 				if(dist <= cutoff){
 					distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
-//cout << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
 				}
 			
 			}
@@ -264,3 +253,4 @@ void DistanceCommand::appendFiles(string temp, string filename) {
 		exit(1);
 	}	
 }
+/**************************************************************************************************/
\ No newline at end of file
diff --git a/helpcommand.cpp b/helpcommand.cpp
index 8060d5f..5ea5158 100644
--- a/helpcommand.cpp
+++ b/helpcommand.cpp
@@ -70,6 +70,18 @@ int HelpCommand::execute(){
 		cout << "The deconvolute command parameter is fasta and it is required." << "\n";
 		cout << "The deconvolute command should be in the following format: " << "\n";
 		cout << "deconvolute(fasta=yourFastaFile) " << "\n";
+	}else if (globaldata->helpRequest == "distance") {
+		cout << "The distance command reads a file containing sequences and creates a distance file." << "\n";
+		cout << "The distance command parameters are fasta, phylip, clustal, nexus, calc, ends, cutoff and processors.  " << "\n";
+		cout << "You must use one of the following parameters for your filename: fasta, phylip, clustal or nexus. " << "\n";
+		cout << "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap." << "\n";
+		cout << "The ends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T." << "\n";
+		cout << "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0." << "\n";
+		cout << "The processors parameter allows you to specify number of processors to use.  The default is 1, but you can use up to 4 processors." << "\n";
+		cout << "The distance command should be in the following format: " << "\n";
+		cout << "distance(fasta=yourFastaFile, calc=yourCalc, ends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) " << "\n";
+		cout << "Example distance(fasta=amazon.fasta, calc=eachgap, ends=F, cutoff= 2.0, processors=3)." << "\n";
+		cout << "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc)." << "\n" << "\n";
 	}else if (globaldata->helpRequest == "collect.single") {
 		cout << "The collect.single command can only be executed after a successful read.otu command. WITH ONE EXECEPTION. " << "\n";
 		cout << "The collect.single command can be executed after a successful cluster command.  It will use the .list file from the output of the cluster." << "\n";
diff --git a/inputdata.cpp b/inputdata.cpp
index 2c249e6..05eaaca 100644
--- a/inputdata.cpp
+++ b/inputdata.cpp
@@ -183,14 +183,14 @@ OrderVector* InputData::getOrderVector(){
 	}	
 }
 /***********************************************************************/
-
+//this is used when you don't need the order vector
 vector<SharedRAbundVector*> InputData::getSharedRAbundVectors(){
 	try {
 		if(fileHandle){
 			if (format == "sharedfile")  {
-				SharedOrder = new SharedOrderVector(fileHandle);
-				if (SharedOrder != NULL) {
-					return SharedOrder->getSharedRAbundVector();
+				SharedRAbundVector* SharedRAbund = new SharedRAbundVector(fileHandle);
+				if (SharedRAbund != NULL) {
+					return SharedRAbund->getSharedRAbundVectors();
 				}
 			}else if (format == "shared") {
 				SharedList = new SharedListVector(fileHandle);
@@ -216,6 +216,7 @@ vector<SharedRAbundVector*> InputData::getSharedRAbundVectors(){
 	}	
 }
 
+
 /***********************************************************************/
 
 SAbundVector* InputData::getSAbundVector(){
diff --git a/readdistcommand.cpp b/readdistcommand.cpp
index 0e45ad5..8de0c34 100644
--- a/readdistcommand.cpp
+++ b/readdistcommand.cpp
@@ -78,7 +78,10 @@ int ReadDistCommand::execute(){
 			globaldata->gMatrix = matrix; //save matrix for coverage commands
 		}else {
 			read->read(nameMap);
+			//to prevent memory leak
+			if (globaldata->gListVector != NULL) { delete globaldata->gListVector;  }
 			globaldata->gListVector = read->getListVector();
+			if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix;  }
 			globaldata->gSparseMatrix = read->getMatrix();
 		}
 		return 0;
diff --git a/readotucommand.cpp b/readotucommand.cpp
index 9f27057..1b2ec9e 100644
--- a/readotucommand.cpp
+++ b/readotucommand.cpp
@@ -45,10 +45,10 @@ int ReadOtuCommand::execute(){
 			groupMap->readMap();
 			
 			//if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap;  }
-			globaldata->gGroupmap = groupMap;
-		
+			globaldata->gGroupmap = groupMap;		
 			shared = new SharedCommand();
 			shared->execute();
+
 			parselist = new ParseListCommand();
 			parselist->execute();
 		}
diff --git a/sharedlistvector.cpp b/sharedlistvector.cpp
index b562450..02c012e 100644
--- a/sharedlistvector.cpp
+++ b/sharedlistvector.cpp
@@ -234,6 +234,7 @@ SharedOrderVector* SharedListVector::getSharedOrderVector(){
 			groupName = groupmap->getGroup(names);
 			order->push_back(i, binSize, groupName);
 		}
+
 		random_shuffle(order->begin(), order->end());
 		return order;
 	}
@@ -293,8 +294,13 @@ vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
 		vector<SharedRAbundVector*> lookup;
 		
 		util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
-		util->getSharedVectors(globaldata->Groups, lookup, this->getSharedOrderVector());
-		
+
+		for (int i = 0; i < globaldata->Groups.size(); i++) {
+			SharedRAbundVector* temp = new SharedRAbundVector();
+			*temp = getSharedRAbundVector(globaldata->Groups[i]);
+			lookup.push_back(temp);
+		}
+
 		return lookup;
 	}
 	catch(exception& e) {
diff --git a/sharedordervector.cpp b/sharedordervector.cpp
index 2830cec..973f695 100644
--- a/sharedordervector.cpp
+++ b/sharedordervector.cpp
@@ -355,18 +355,18 @@ void SharedOrderVector::updateStats(){
 		maxRank = 0;
 	
 		for(int i=0;i<data.size();i++){
+
 			if(data[i].bin != -1){
 				numSeqs++;
 			}
 		}
+		//vector<individual> hold(numSeqs, 0);
 	
-		vector<individual> hold(numSeqs);
-	
-		for(int i=0;i<numSeqs;i++){
-			if(data[i].bin != -1){
-				hold[data[i].bin].bin = hold[data[i].bin].bin+1;
-			}
-		}	
+		//for(int i=0;i<numSeqs;i++){
+			//if(data[i].bin != -1){
+				//hold[data[i].bin].bin = hold[data[i].bin].bin+1;
+			//}
+		//}	
 
 		for(int i=0;i<numSeqs;i++){
 			if(data[i].bin > numBins) { numBins = data[i].bin;	} 
diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp
index 9634613..1b8023d 100644
--- a/sharedrabundvector.cpp
+++ b/sharedrabundvector.cpp
@@ -13,6 +13,7 @@ using namespace std;
 #include "sharedrabundvector.h" 
 #include "sabundvector.hpp"
 #include "ordervector.hpp"
+#include "sharedutilities.h"
 
 
 /***********************************************************************/
@@ -56,30 +57,87 @@ SharedRAbundVector::SharedRAbundVector(string id, vector<individual> rav) : Data
 }
 
 
-/***********************************************************************
-
-
+/***********************************************************************/
+//reads a shared file
 SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
 	try {
-		int i, num;
-		string holdLabel, group
-		individual newGuy;
+		globaldata = GlobalData::getInstance();
 		
-		f >> label >> group >> num;
+		if (globaldata->gGroupmap == NULL) {  groupmap = new GroupMap(); }
 		
-		//initialize data
-		for (i=0; i<hold; i++) {
-			newGuy = new individual;
-			newGuy.abundance = 0;
-			newGuy.bin = i;
-			data.push_back(newGuy);
+		int num, inputData, pos, count;
+		count = 0;  
+		string holdLabel, nextLabel, groupN;
+		individual newguy;
+		
+		for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+		lookup.clear();
+		
+		//read in first row since you know there is at least 1 group.
+		f >> label >> groupN >> num;
+		holdLabel = label;
+		
+		//add new vector to lookup
+		lookup.push_back(new SharedRAbundVector(num));
+		lookup[0]->setLabel(label);
+		lookup[0]->setGroup(groupN);
+		
+		if (globaldata->gGroupmap == NULL) { 
+			//save group in groupmap
+			groupmap->namesOfGroups.push_back(groupN);
+			groupmap->groupIndex[groupN] = 0;
 		}
-		int inputData;
-	
-		for(int i=0;i<hold;i++){
+		
+		//fill vector.  data = first sharedrabund in file
+		for(int i=0;i<num;i++){
 			f >> inputData;
-			set(i, inputData);
+			
+			lookup[0]->push_back(inputData, i, groupN); //abundance, bin, group
+			push_back(inputData, i, groupN);
+			numSeqs += inputData;
+			numBins++;
+			if (inputData > maxRank) { maxRank = inputData; }
+			
 		}
+		
+		//save position in file in case next line is a new label.
+		pos = f.tellg();
+		
+		if (f.eof() != true) { f >> nextLabel; }
+		
+		//read the rest of the groups info in
+		while ((nextLabel == holdLabel) && (f.eof() != true)) {
+			f >> groupN >> num;
+			count++;
+			
+			if (globaldata->gGroupmap == NULL) { 
+				//save group in groupmap
+				groupmap->namesOfGroups.push_back(groupN);
+				groupmap->groupIndex[groupN] = count;
+			}
+			
+			//add new vector to lookup
+			lookup.push_back(new SharedRAbundVector(num));
+			lookup[count]->setLabel(label);
+			lookup[count]->setGroup(groupN);
+
+			//fill vector.  
+			for(int i=0;i<num;i++){
+				f >> inputData;
+				lookup[count]->push_back(inputData, i, groupN); //abundance, bin, group
+			}
+			
+			//save position in file in case next line is a new label.
+			pos = f.tellg();
+	
+			if (f.eof() != true) { f >> nextLabel; }
+		}
+		
+		//put file pointer back since you are now at a new distance label
+		f.seekg(pos, ios::beg);
+	
+		if (globaldata->gGroupmap == NULL) { globaldata->gGroupmap = groupmap; }
+		
 	}
 	catch(exception& e) {
 		cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function SharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -325,6 +383,34 @@ SharedRAbundVector SharedRAbundVector::getSharedRAbundVector(){
 	return *this;			
 }
 /***********************************************************************/
+vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
+	try {
+		SharedUtil* util;
+		util = new SharedUtil();
+		
+		util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
+
+		for (int i = 0; i < lookup.size(); i++) {
+			//if this sharedrabund is not from a group the user wants then delete it.
+			if (util->isValidGroup(lookup[i]->getGroup(), globaldata->Groups) == false) { 
+				delete lookup[i]; 
+				lookup.erase(lookup.begin()+i); 
+				i--; 
+			}
+		}
+
+		return lookup;
+	}
+	catch(exception& e) {
+		cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getSharedRAbundVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+		exit(1);
+	}
+	catch(...) {
+		cout << "An unknown error has occurred in the SharedRAbundVector class function getSharedRAbundVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+		exit(1);
+	}
+}
+/***********************************************************************/
 
 RAbundVector SharedRAbundVector::getRAbundVector() {
 	try {
diff --git a/sharedrabundvector.h b/sharedrabundvector.h
index 5cc92a3..0c71394 100644
--- a/sharedrabundvector.h
+++ b/sharedrabundvector.h
@@ -14,6 +14,7 @@
 #include "sharedordervector.h"
 #include "sharedsabundvector.h"
 #include "rabundvector.hpp"
+#include "groupmap.h"
 
 /* This class is a child to datavector.  It represents OTU information at a certain distance. 
 	It is similiar to an rabundvector except each member of data knows which group it belongs to.
@@ -21,7 +22,7 @@
 	An individual which knows the OTU from which it came, 
 	the group it is in and its abundance.  */
 
-
+class GlobalData;
 
 class SharedRAbundVector : public DataVector {
 	
@@ -30,7 +31,7 @@ public:
 	SharedRAbundVector(int);
 	//SharedRAbundVector(string, vector<int>);
 	SharedRAbundVector(const SharedRAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs){};
-    //SharedRAbundVector(ifstream&);
+    SharedRAbundVector(ifstream&);
 	~SharedRAbundVector();
 
 	int getNumBins();		
@@ -66,9 +67,13 @@ public:
 	SharedOrderVector getSharedOrderVector();
 	SharedSAbundVector getSharedSAbundVector();
 	SharedRAbundVector getSharedRAbundVector();
+	vector<SharedRAbundVector*> getSharedRAbundVectors();
 	
 private:
 	vector<individual>  data; 
+	vector<SharedRAbundVector*> lookup;
+	GlobalData* globaldata;
+	GroupMap* groupmap;
 	int maxRank;
 	int numBins;
 	int numSeqs;
diff --git a/sharedutilities.cpp b/sharedutilities.cpp
index fc1b335..fd0595d 100644
--- a/sharedutilities.cpp
+++ b/sharedutilities.cpp
@@ -125,7 +125,7 @@ void SharedUtil::setGroups(vector<string>& userGroups, vector<string>& allGroups
 				
 				//if the user only entered invalid groups
 				if (userGroups.size() == 0) { 
-					cout << "When using the groups parameter you must have at least 1 valid groups. I will run the command using all the groups in your groupfile." << endl; 
+					cout << "You provided no valid groups. I will run the command using all the groups in your groupfile." << endl; 
 					for (int i = 0; i < allGroups.size(); i++) {
 						userGroups.push_back(allGroups[i]);
 					}
diff --git a/sharedutilities.h b/sharedutilities.h
index fa00933..75ecf23 100644
--- a/sharedutilities.h
+++ b/sharedutilities.h
@@ -28,10 +28,11 @@ class SharedUtil {
 		void setGroups(vector<string>&, vector<string>&, string&, int&, string);  //globaldata->Groups, your tree or group map, allgroups, numGroups, mode
 		void getCombos(vector<string>&, vector<string>, int&); //groupcomb, globaldata->Groups, numcomb
 		void updateGroupIndex(vector<string>&, map<string, int>&); //globaldata->Groups, groupmap->groupIndex
+		bool isValidGroup(string, vector<string>);
 		
 	private:
 	
-		bool isValidGroup(string, vector<string>);
+		
 };
 
 /**************************************************************************************************/
diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp
index de30d3c..047ea61 100644
--- a/treegroupscommand.cpp
+++ b/treegroupscommand.cpp
@@ -89,11 +89,12 @@ int TreeGroupCommand::execute(){
 		if (treeCalculators.size() == 0) { return 0; }
 
 		if (format == "sharedfile") {
+			//you have groups
 			read = new ReadOTUFile(globaldata->inputFileName);	
 			read->read(&*globaldata); 
 			
 			input = globaldata->ginput;
-			order = input->getSharedOrderVector();
+			lookup = input->getSharedRAbundVectors();
 		}else {
 			//you are using a list and a groupfile
 			read = new ReadOTUFile(globaldata->inputFileName);	
@@ -101,11 +102,11 @@ int TreeGroupCommand::execute(){
 		
 			input = globaldata->ginput;
 			SharedList = globaldata->gSharedList;
-			order = SharedList->getSharedOrderVector();
+			lookup = SharedList->getSharedRAbundVector();
 		}
 		
-		//set users groups
-		util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
+		if (lookup.size() < 2) { cout << "You have not provided enough valid groups.  I cannot run the command." << endl; }
+		
 		numGroups = globaldata->Groups.size();
 		groupNames = "";
 		for (int i = 0; i < numGroups; i++) { groupNames += globaldata->Groups[i]; }
@@ -121,12 +122,11 @@ int TreeGroupCommand::execute(){
 		tmap->makeSim(globaldata->gGroupmap);
 		globaldata->gTreemap = tmap;
 			
-		while(order != NULL){
+		while(lookup[0] != NULL){
 		
-			if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){			
+			if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){			
 				
-				cout << order->getLabel() << '\t' << count << endl;
-				util->getSharedVectors(globaldata->Groups, lookup, order);  //fills group vectors from order vector.
+				cout << lookup[0]->getLabel() << '\t' << count << endl;
 				
 				//for each calculator												
 				for(int i = 0 ; i < treeCalculators.size(); i++) {
@@ -145,7 +145,7 @@ int TreeGroupCommand::execute(){
 					for (int g = 0; g < numGroups; g++) {	index[g] = g;	}
 		
 					//create a new filename
-					outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + order->getLabel() + ".tre";
+					outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre";
 							
 					for (int k = 0; k < lookup.size(); k++) { 
 						for (int l = k; l < lookup.size(); l++) {
@@ -170,17 +170,7 @@ int TreeGroupCommand::execute(){
 			}
 		
 			//get next line to process
-			if (format == "sharedfile") {
-				order = input->getSharedOrderVector();
-			}else {
-				//you are using a list and a groupfile
-				SharedList = input->getSharedListVector(); //get new list vector to process
-				if (SharedList != NULL) {
-					order = SharedList->getSharedOrderVector(); //gets new order vector with group info.
-				}else {
-					break;
-				}
-			}
+			lookup = input->getSharedRAbundVectors();
 			count++;
 		}