X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bayesian.cpp;h=b007c006dc741c2fd1755d60dcee7d3f5942aa39;hb=e189982e0a9b7352ad57cc38ccee675f128be22e;hp=01e4139fc877946e8a491d6c5b1e5beafed577b8;hpb=ef3f6d42fe720cd6d91419e5e32f8c04d8765010;p=mothur.git

diff --git a/bayesian.cpp b/bayesian.cpp
index 01e4139..b007c00 100644
--- a/bayesian.cpp
+++ b/bayesian.cpp
@@ -11,8 +11,8 @@
 #include "kmer.hpp"
 
 /**************************************************************************************************/
-Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff) : 
-Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), confidenceThreshold(cutoff)  {
+Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i) : 
+Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
 	try {
 					
 		numKmers = database->getMaxKmer() + 1;
@@ -33,20 +33,20 @@ Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), c
 		
 		/************calculate the probablity that each word will be in a specific taxonomy*************/
 		ofstream out;
-		string probFileName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer.prob";
+		string probFileName = tfile.substr(0,tfile.find_last_of(".")+1) + tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer.prob";
 		ifstream probFileTest(probFileName.c_str());
 		
 		ofstream out2;
-		string probFileName2 = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer.numNonZero";
+		string probFileName2 = tfile.substr(0,tfile.find_last_of(".")+1) + tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer.numNonZero";
 		ifstream probFileTest2(probFileName2.c_str());
 		
 		int start = time(NULL);
 		
 		if(probFileTest && probFileTest2){	
-			mothurOut("Reading template probabilities...     "); cout.flush();
+			m->mothurOut("Reading template probabilities...     "); cout.flush();
 			readProbFile(probFileTest, probFileTest2);	
 		}else{
-			mothurOut("Calculating template probabilities...     "); cout.flush();
+			m->mothurOut("Calculating template probabilities...     "); cout.flush();
 
 			ofstream out;
 			openOutputFile(probFileName, out);
@@ -56,6 +56,7 @@ Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), c
 			
 			//for each word
 			for (int i = 0; i < numKmers; i++) {
+				if (m->control_pressed) { break; }
 				
 				out << i << '\t';
 				
@@ -88,41 +89,49 @@ Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), c
 		}
 		
 		
-		mothurOut("DONE."); mothurOutEndLine();
-		mothurOut("It took " + toString(time(NULL) - start) + " seconds get probabilities. "); mothurOutEndLine();
+		m->mothurOut("DONE."); m->mothurOutEndLine();
+		m->mothurOut("It took " + toString(time(NULL) - start) + " seconds get probabilities. "); m->mothurOutEndLine();
 	}
 	catch(exception& e) {
-		errorOut(e, "Bayesian", "getTaxonomy");
+		m->errorOut(e, "Bayesian", "Bayesian");
 		exit(1);
 	}
 }
 /**************************************************************************************************/
 string Bayesian::getTaxonomy(Sequence* seq) {
 	try {
-		string tax;
+		string tax = "";
 		Kmer kmer(kmerSize);
 		
 		//get words contained in query
 		//getKmerString returns a string where the index in the string is hte kmer number 
 		//and the character at that index can be converted to be the number of times that kmer was seen
+
 		string queryKmerString = kmer.getKmerString(seq->getUnaligned()); 
+
 		vector<int> queryKmers;
 		for (int i = 0; i < queryKmerString.length(); i++) {
 			if (queryKmerString[i] != '!') { //this kmer is in the query
 				queryKmers.push_back(i);
+ 
 			}
 		}
-	
+		
+		if (queryKmers.size() == 0) {  m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); return "bad seq"; }
+		
 		int index = getMostProbableTaxonomy(queryKmers);
+
+		
+		if (m->control_pressed) { return tax; }
 					
 		//bootstrap - to set confidenceScore
 		int numToSelect = queryKmers.size() / 8;
 		tax = bootstrapResults(queryKmers, index, numToSelect);
-				
+						
 		return tax;	
 	}
 	catch(exception& e) {
-		errorOut(e, "Bayesian", "getTaxonomy");
+		m->errorOut(e, "Bayesian", "getTaxonomy");
 		exit(1);
 	}
 }
@@ -142,7 +151,9 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
 		map<string, int>::iterator itBoot2;
 		map<int, int>::iterator itConvert;
 		
-		for (int i = 0; i < 100; i++) {
+		for (int i = 0; i < iters; i++) {
+			if (m->control_pressed) { return "control"; }
+			
 			vector<int> temp;
 						
 			for (int j = 0; j < numToSelect; j++) {
@@ -166,9 +177,10 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
 				}else{
 					confidenceScores[taxonomy.level][taxonomy.name]++;
 				}
-			
+		
 				taxonomy = phyloTree->get(taxonomy.parent);
 			}
+	
 		}
 		
 		string confidenceTax = "";
@@ -176,7 +188,7 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
 		TaxNode seqTax = phyloTree->get(tax);
 		
 		while (seqTax.level != 0) { //while you are not at the root
-				
+					
 				itBoot2 = confidenceScores[seqTax.level].find(seqTax.name); //is this a classification we already have a count on
 				
 				int confidence = 0;
@@ -185,30 +197,30 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
 				}
 				
 				if (confidence >= confidenceThreshold) {
-					confidenceTax = seqTax.name + "(" + toString(confidence) + ");" + confidenceTax;
+					confidenceTax = seqTax.name + "(" + toString(((confidence/(float)iters) * 100)) + ");" + confidenceTax;
 					simpleTax = seqTax.name + ";" + simpleTax;
 				}
 				
 				seqTax = phyloTree->get(seqTax.parent);
 		}
 		
+		if (confidenceTax == "") { confidenceTax = "unclassified;"; simpleTax = "unclassified;"; }
 		return confidenceTax;
 		
 	}
 	catch(exception& e) {
-		errorOut(e, "Bayesian", "bootstrapResults");
+		m->errorOut(e, "Bayesian", "bootstrapResults");
 		exit(1);
 	}
 }
 /**************************************************************************************************/
 int Bayesian::getMostProbableTaxonomy(vector<int> queryKmer) {
 	try {
-		int indexofGenus;
+		int indexofGenus = 0;
 		
 		double maxProbability = -1000000.0;
 		//find taxonomy with highest probability that this sequence is from it
 		for (int k = 0; k < genusNodes.size(); k++) {
-		
 			//for each taxonomy calc its probability
 			double prob = 1.0;
 			for (int i = 0; i < queryKmer.size(); i++) {
@@ -225,7 +237,7 @@ int Bayesian::getMostProbableTaxonomy(vector<int> queryKmer) {
 		return indexofGenus;
 	}
 	catch(exception& e) {
-		errorOut(e, "Bayesian", "getMostProbableTaxonomy");
+		m->errorOut(e, "Bayesian", "getMostProbableTaxonomy");
 		exit(1);
 	}
 }
@@ -252,7 +264,7 @@ map<string, int> Bayesian::parseTaxMap(string newTax) {
 		
 	}
 	catch(exception& e) {
-		errorOut(e, "Bayesian", "parseTax");
+		m->errorOut(e, "Bayesian", "parseTax");
 		exit(1);
 	}
 }
@@ -291,7 +303,7 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum) {
 		in.close();
 	}
 	catch(exception& e) {
-		errorOut(e, "Bayesian", "readProbFile");
+		m->errorOut(e, "Bayesian", "readProbFile");
 		exit(1);
 	}
 }