X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=blastdb.cpp;h=2eced7e14dc6fa42e0147747fc622bfa088250f7;hp=96770dcd5334afc6ed3d067ea6c7230b7c3edc73;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=19fcbbdba99658f5eca244803280f9ee7f9f6607 diff --git a/blastdb.cpp b/blastdb.cpp index 96770dc..2eced7e 100644 --- a/blastdb.cpp +++ b/blastdb.cpp @@ -14,48 +14,60 @@ /**************************************************************************************************/ -BlastDB::BlastDB(string tag, float gO, float gE, float mm, float mM) : Database(), +BlastDB::BlastDB(string tag, float gO, float gE, float mm, float mM, string b, int tid) : Database(), gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { try { count = 0; + path = b; + threadID = tid; int randNumber = rand(); //int randNumber = 12345; string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + pid += getpid(); +#else + pid += toString(threadID); #endif + dbFileName = tag + pid + toString(randNumber) + ".template.unaligned.fasta"; queryFileName = tag + pid + toString(randNumber) + ".candidate.unaligned.fasta"; blastFileName = tag + pid + toString(randNumber) + ".blast"; //make sure blast exists in the write place - path = m->argv; - string tempPath = path; - for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } - path = path.substr(0, (tempPath.find_last_of('m'))); + if (path == "") { + path = m->argv; + string tempPath = path; + for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } + path = path.substr(0, (tempPath.find_last_of('m'))); + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + path += "blast/bin/"; +#else + path += "blast\\bin\\"; +#endif + } + string formatdbCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - formatdbCommand = path + "blast/bin/formatdb"; // format the database, -o option gives us the ability +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + formatdbCommand = path + "formatdb"; // format the database, -o option gives us the ability #else - formatdbCommand = path + "blast\\bin\\formatdb.exe"; - //wrap entire string in "" - //formatdbCommand = "\"" + formatdbCommand + "\""; + formatdbCommand = path + "formatdb.exe"; #endif //test to make sure formatdb exists ifstream in; formatdbCommand = m->getFullPathName(formatdbCommand); int ableToOpen = m->openInputFile(formatdbCommand, in, "no error"); in.close(); - if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe to be in the ./blast/bin folder relative to the mothur.exe location."); m->mothurOutEndLine(); m->control_pressed = true; } + if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe."); m->mothurOutEndLine(); m->control_pressed = true; } string blastCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - blastCommand = path + "blast/bin/blastall"; // format the database, -o option gives us the ability +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + blastCommand = path + "blastall"; // format the database, -o option gives us the ability #else - blastCommand = path + "blast\\bin\\blastall.exe"; + blastCommand = path + "blastall.exe"; //wrap entire string in "" //blastCommand = "\"" + blastCommand + "\""; #endif @@ -64,23 +76,21 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { ifstream in2; blastCommand = m->getFullPathName(blastCommand); ableToOpen = m->openInputFile(blastCommand, in2, "no error"); in2.close(); - if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + blastCommand + " file does not exist. mothur requires blastall.exe to be in the ./blast/bin folder relative to the mothur.exe location."); m->mothurOutEndLine(); m->control_pressed = true; } + if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + blastCommand + " file does not exist. mothur requires blastall.exe."); m->mothurOutEndLine(); m->control_pressed = true; } string megablastCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - megablastCommand = path + "blast/bin/megablast"; // format the database, -o option gives us the ability +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + megablastCommand = path + "megablast"; // format the database, -o option gives us the ability #else - megablastCommand = path + "blast\\bin\\megablast.exe"; - //wrap entire string in "" - //megablastCommand = "\"" + megablastCommand + "\""; + megablastCommand = path + "megablast.exe"; #endif //test to make sure formatdb exists ifstream in3; megablastCommand = m->getFullPathName(megablastCommand); ableToOpen = m->openInputFile(megablastCommand, in3, "no error"); in3.close(); - if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + megablastCommand + " file does not exist. mothur requires megablast.exe to be in the ./blast/bin folder relative to the mothur.exe location."); m->mothurOutEndLine(); m->control_pressed = true; } + if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + megablastCommand + " file does not exist. mothur requires megablast.exe."); m->mothurOutEndLine(); m->control_pressed = true; } } catch(exception& e) { @@ -90,31 +100,44 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { } /**************************************************************************************************/ -BlastDB::BlastDB() : Database() { +BlastDB::BlastDB(string b, int tid) : Database() { try { count = 0; - + + path = b; + threadID = tid; + + //make sure blast exists in the write place + if (path == "") { + path = m->argv; + string tempPath = path; + for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } + path = path.substr(0, (tempPath.find_last_of('m'))); + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + path += "blast/bin/"; +#else + path += "blast\\bin\\"; +#endif + } + int randNumber = rand(); string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + pid += getpid(); +#else + pid += toString(threadID); #endif dbFileName = pid + toString(randNumber) + ".template.unaligned.fasta"; queryFileName = pid + toString(randNumber) + ".candidate.unaligned.fasta"; blastFileName = pid + toString(randNumber) + ".blast"; - //make sure blast exists in the write place - path = m->argv; - string tempPath = path; - for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } - path = path.substr(0, (tempPath.find_last_of('m'))); - string formatdbCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - formatdbCommand = path + "blast/bin/formatdb"; // format the database, -o option gives us the ability +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + formatdbCommand = path + "formatdb"; // format the database, -o option gives us the ability #else - formatdbCommand = path + "blast\\bin\\formatdb.exe"; + formatdbCommand = path + "formatdb.exe"; //wrap entire string in "" //formatdbCommand = "\"" + formatdbCommand + "\""; #endif @@ -123,13 +146,13 @@ BlastDB::BlastDB() : Database() { ifstream in; formatdbCommand = m->getFullPathName(formatdbCommand); int ableToOpen = m->openInputFile(formatdbCommand, in, "no error"); in.close(); - if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe to be in the ./blast/bin folder relative to the mothur.exe location."); m->mothurOutEndLine(); m->control_pressed = true; } + if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe."); m->mothurOutEndLine(); m->control_pressed = true; } string blastCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - blastCommand = path + "blast/bin/blastall"; // format the database, -o option gives us the ability +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + blastCommand = path + "blastall"; // format the database, -o option gives us the ability #else - blastCommand = path + "blast\\bin\\blastall.exe"; + blastCommand = path + "blastall.exe"; //wrap entire string in "" //blastCommand = "\"" + blastCommand + "\""; #endif @@ -138,14 +161,14 @@ BlastDB::BlastDB() : Database() { ifstream in2; blastCommand = m->getFullPathName(blastCommand); ableToOpen = m->openInputFile(blastCommand, in2, "no error"); in2.close(); - if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + blastCommand + " file does not exist. mothur requires blastall.exe to be in the ./blast/bin folder relative to the mothur.exe location."); m->mothurOutEndLine(); m->control_pressed = true; } + if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + blastCommand + " file does not exist. mothur requires blastall.exe."); m->mothurOutEndLine(); m->control_pressed = true; } string megablastCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - megablastCommand = path + "blast/bin/megablast"; // format the database, -o option gives us the ability +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + megablastCommand = path + "megablast"; // format the database, -o option gives us the ability #else - megablastCommand = path + "blast\\bin\\megablast.exe"; + megablastCommand = path + "megablast.exe"; //wrap entire string in "" //megablastCommand = "\"" + megablastCommand + "\""; #endif @@ -154,7 +177,7 @@ BlastDB::BlastDB() : Database() { ifstream in3; megablastCommand = m->getFullPathName(megablastCommand); ableToOpen = m->openInputFile(megablastCommand, in3, "no error"); in3.close(); - if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + megablastCommand + " file does not exist. mothur requires megablast.exe to be in the ./blast/bin folder relative to the mothur.exe location."); m->mothurOutEndLine(); m->control_pressed = true; } + if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + megablastCommand + " file does not exist. mothur requires megablast.exe."); m->mothurOutEndLine(); m->control_pressed = true; } } @@ -190,10 +213,7 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { ofstream queryFile; int randNumber = rand(); - string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); -#endif + string pid = scrubName(seq->getName()); m->openOutputFile((queryFileName+pid+toString(randNumber)), queryFile); queryFile << '>' << seq->getName() << endl; @@ -206,12 +226,12 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { // long. With this setting, it seems comparable in speed to the suffix tree approach. string blastCommand; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n); + blastCommand = path + "blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n); blastCommand += (" -i " + (queryFileName+pid+toString(randNumber)) + " -o " + blastFileName+pid+toString(randNumber)); #else - blastCommand = "\"" + path + "blast\\bin\\blastall\" -p blastn -d " + "\"" + dbFileName + "\"" + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n); + blastCommand = "\"" + path + "blastall\" -p blastn -d " + "\"" + dbFileName + "\"" + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n); blastCommand += (" -i " + (queryFileName+pid+toString(randNumber)) + " -o " + blastFileName+pid+toString(randNumber)); //wrap entire string in "" blastCommand = "\"" + blastCommand + "\""; @@ -257,10 +277,7 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { ofstream queryFile; int randNumber = rand(); - string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); -#endif + string pid = scrubName(seq->getName()); m->openOutputFile((queryFileName+pid+toString(randNumber)), queryFile); queryFile << '>' << seq->getName() << endl; @@ -272,14 +289,14 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { // long. With this setting, it seems comparable in speed to the suffix tree approach. //7000004128189528left 0 100 66 0 0 1 66 61 126 1e-31 131 string blastCommand; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + blastCommand = path + "megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn blastCommand += (" -i " + (queryFileName+pid+toString(randNumber)) + " -o " + blastFileName+pid+toString(randNumber)); #else //blastCommand = path + "blast\\bin\\megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn //blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber)); - blastCommand = "\"" + path + "blast\\bin\\megablast\" -e 1e-10 -d " + "\"" + dbFileName + "\"" + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn + blastCommand = "\"" + path + "megablast\" -e 1e-10 -d " + "\"" + dbFileName + "\"" + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn blastCommand += (" -i " + (queryFileName+pid+toString(randNumber)) + " -o " + blastFileName+pid+toString(randNumber)); //wrap entire string in "" blastCommand = "\"" + blastCommand + "\""; @@ -343,20 +360,15 @@ void BlastDB::generateDB() { try { //m->mothurOut("Generating the temporary BLAST database...\t"); cout.flush(); - - path = m->argv; - string tempPath = path; - for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } - path = path.substr(0, (tempPath.find_last_of('m'))); - + string formatdbCommand; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName; // format the database, -o option gives us the ability + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + formatdbCommand = path + "formatdb -p F -o T -i " + dbFileName; // format the database, -o option gives us the ability #else //formatdbCommand = path + "blast\\bin\\formatdb -p F -o T -i " + dbFileName; // format the database, -o option gives us the ability - formatdbCommand = "\"" + path + "blast\\bin\\formatdb\" -p F -o T -i " + "\"" + dbFileName + "\""; + formatdbCommand = "\"" + path + "formatdb\" -p F -o T -i " + "\"" + dbFileName + "\""; //wrap entire string in "" formatdbCommand = "\"" + formatdbCommand + "\""; #endif @@ -371,6 +383,24 @@ void BlastDB::generateDB() { } } /**************************************************************************************************/ +string BlastDB::scrubName(string seqName) { + try { + + string cleanName = ""; + + for (int i = 0; i < seqName.length(); i++) { + if (isalnum(seqName[i])) { cleanName += seqName[i]; } + else { cleanName += "_"; } + } + + return cleanName; + } + catch(exception& e) { + m->errorOut(e, "BlastDB", "scrubName"); + exit(1); + } +} +/**************************************************************************************************/ /**************************************************************************************************/