X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=blastdb.cpp;h=b1a7b48d1108d747c1423eb0f8bad19e3ceede42;hb=1e8d08e96f4fe99604a6b3502568de464bf60891;hp=979d507c23d61b4537d62a1a4736721624616fb2;hpb=260ae19c36cb11a53ddc5a75b5e507f8dd8b31d6;p=mothur.git

diff --git a/blastdb.cpp b/blastdb.cpp
index 979d507..b1a7b48 100644
--- a/blastdb.cpp
+++ b/blastdb.cpp
@@ -14,23 +14,21 @@
 
 /**************************************************************************************************/
 
-BlastDB::BlastDB(float gO, float gE, float m, float mM) : Database(), 
+BlastDB::BlastDB(string tag, float gO, float gE, float m, float mM) : Database(), 
 gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
 	
-	globaldata = GlobalData::getInstance();
 	count = 0;
 
 	int randNumber = rand();
-	dbFileName = toString(randNumber) + ".template.unaligned.fasta";
-	queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
-	blastFileName = toString(randNumber) + ".blast";
+	dbFileName = tag + toString(randNumber) + ".template.unaligned.fasta";
+	queryFileName = tag + toString(randNumber) + ".candidate.unaligned.fasta";
+	blastFileName = tag + toString(randNumber) + ".blast";
 
 }
 /**************************************************************************************************/
 
 BlastDB::BlastDB() : Database() {
 	try {
-		globaldata = GlobalData::getInstance();
 		count = 0;
 
 		int randNumber = rand();
@@ -79,8 +77,17 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
 		//	wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
 		//	long.  With this setting, it seems comparable in speed to the suffix tree approach.
 		
-		string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);;
-		blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+		string blastCommand;
+		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+		
+			blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);;
+			blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+		#else
+			blastCommand =  "\"" + path + "blast\\bin\\blastall\" -p blastn -d " + "\"" + dbFileName + "\"" + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);
+			blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+			//wrap entire string in ""
+			blastCommand = "\"" + blastCommand + "\"";
+		#endif
 		system(blastCommand.c_str());
 		
 		ifstream m8FileHandle;
@@ -113,49 +120,66 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
 }
 /**************************************************************************************************/
 //assumes you have added all the template sequences using the addSequence function and run generateDB.
-vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n) {
+vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) {
 	try{
 		vector<int> topMatches;
+		float numBases, mismatch, gap, startQuery, endQuery, startRef, endRef, score;
+		Scores.clear();
 		
 		ofstream queryFile;
+
 		m->openOutputFile((queryFileName+seq->getName()), queryFile);
 		queryFile << '>' << seq->getName() << endl;
 		queryFile << seq->getUnaligned() << endl;
 		queryFile.close();
-				
+//		cout << seq->getUnaligned() << endl;
 		//	the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
 		//	wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
 		//	long.  With this setting, it seems comparable in speed to the suffix tree approach.
-	
-		string blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
-		blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+//7000004128189528left	0	100		66	0	0	1	66	61	126	1e-31	 131	
+		string blastCommand;
+		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+			blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
+			blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+		#else
+			blastCommand =  "\"" + path + "blast\\bin\\megablast\" -e 1e-10 -d " + "\"" + dbFileName + "\"" + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
+			blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+			//wrap entire string in ""
+			blastCommand = "\"" + blastCommand + "\"";
+
+		#endif
+		
 		system(blastCommand.c_str());
 
 		ifstream m8FileHandle;
 		m->openInputFile(blastFileName+seq->getName(), m8FileHandle, "no error");
 	
-		string dummy;
+		string dummy, eScore;
 		int templateAccession;
 		m->gobble(m8FileHandle);
 		
 		while(!m8FileHandle.eof()){
-			m8FileHandle >> dummy >> templateAccession >> searchScore;
+			m8FileHandle >> dummy >> templateAccession >> searchScore >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
+//			cout << dummy << '\t' << templateAccession << '\t' << searchScore << '\t' << numBases << '\t' << mismatch << '\t' << gap << '\t' << startQuery << '\t' << endQuery << '\t' << startRef << '\t' << endRef << '\t' << eScore << '\t' << score << endl; 
 			
 			//get rest of junk in line
-			while (!m8FileHandle.eof())	{	char c = m8FileHandle.get(); if (c == 10 || c == 13){	break;	}	} 
-			
+			//while (!m8FileHandle.eof())	{	char c = m8FileHandle.get(); if (c == 10 || c == 13){	break;	}else{ cout << c; }	} //
+				//cout << endl;
 			m->gobble(m8FileHandle);
-			topMatches.push_back(templateAccession);
+			if (searchScore >= minPerID) { 
+				topMatches.push_back(templateAccession);
+				Scores.push_back(searchScore);
+			}
 //cout << templateAccession << endl;
 		}
 		m8FileHandle.close();
 		remove((queryFileName+seq->getName()).c_str());
 		remove((blastFileName+seq->getName()).c_str());
-//cout << "\n\n" ;		
+//cout << "\n" ;		
 		return topMatches;
 	}
 	catch(exception& e) {
-		m->errorOut(e, "BlastDB", "findClosest");
+		m->errorOut(e, "BlastDB", "findClosestMegaBlast");
 		exit(1);
 	}
 }
@@ -184,10 +208,20 @@ void BlastDB::generateDB() {
 	
 		//m->mothurOut("Generating the temporary BLAST database...\t");	cout.flush();
 		
-		path = globaldata->argv;
-		path = path.substr(0, (path.find_last_of('m')));
+		path = m->argv;
+		string tempPath = path;
+		for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
+		path = path.substr(0, (tempPath.find_last_of('m')));
 	
-		string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;	//	format the database, -o option gives us the ability
+		string formatdbCommand;
+		
+		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+			formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;	//	format the database, -o option gives us the ability
+		#else
+			formatdbCommand = "\"" + path + "blast\\bin\\formatdb\" -p F -o T -i " + "\"" +  dbFileName + "\"";
+			//wrap entire string in ""
+			formatdbCommand = "\"" + formatdbCommand + "\"";
+		#endif
 		system(formatdbCommand.c_str());								//	to get the right sequence names, i think. -p F
 																	//	option tells formatdb that seqs are DNA, not prot
 		//m->mothurOut("DONE."); m->mothurOutEndLine();	m->mothurOutEndLine(); cout.flush();