X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=blastdb.cpp;h=2eced7e14dc6fa42e0147747fc622bfa088250f7;hb=1a968f34ae2d2680eaf189a197d1a21b8dfd6c03;hp=af157203630c68bc8e8cfd0e09a2bcf846e4d33c;hpb=4e4d3cb3315fe25b40383c6f65cd6be23f58903d;p=mothur.git diff --git a/blastdb.cpp b/blastdb.cpp index af15720..2eced7e 100644 --- a/blastdb.cpp +++ b/blastdb.cpp @@ -14,19 +14,23 @@ /**************************************************************************************************/ -BlastDB::BlastDB(string tag, float gO, float gE, float mm, float mM, string b) : Database(), +BlastDB::BlastDB(string tag, float gO, float gE, float mm, float mM, string b, int tid) : Database(), gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { try { count = 0; path = b; + threadID = tid; int randNumber = rand(); //int randNumber = 12345; string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + pid += getpid(); +#else + pid += toString(threadID); #endif + dbFileName = tag + pid + toString(randNumber) + ".template.unaligned.fasta"; queryFileName = tag + pid + toString(randNumber) + ".candidate.unaligned.fasta"; blastFileName = tag + pid + toString(randNumber) + ".blast"; @@ -38,7 +42,7 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } path = path.substr(0, (tempPath.find_last_of('m'))); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) path += "blast/bin/"; #else path += "blast\\bin\\"; @@ -47,7 +51,7 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { string formatdbCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) formatdbCommand = path + "formatdb"; // format the database, -o option gives us the ability #else formatdbCommand = path + "formatdb.exe"; @@ -60,7 +64,7 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe."); m->mothurOutEndLine(); m->control_pressed = true; } string blastCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) blastCommand = path + "blastall"; // format the database, -o option gives us the ability #else blastCommand = path + "blastall.exe"; @@ -76,7 +80,7 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { string megablastCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) megablastCommand = path + "megablast"; // format the database, -o option gives us the ability #else megablastCommand = path + "megablast.exe"; @@ -96,11 +100,12 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { } /**************************************************************************************************/ -BlastDB::BlastDB(string b) : Database() { +BlastDB::BlastDB(string b, int tid) : Database() { try { count = 0; path = b; + threadID = tid; //make sure blast exists in the write place if (path == "") { @@ -109,7 +114,7 @@ BlastDB::BlastDB(string b) : Database() { for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } path = path.substr(0, (tempPath.find_last_of('m'))); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) path += "blast/bin/"; #else path += "blast\\bin\\"; @@ -118,8 +123,10 @@ BlastDB::BlastDB(string b) : Database() { int randNumber = rand(); string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + pid += getpid(); +#else + pid += toString(threadID); #endif dbFileName = pid + toString(randNumber) + ".template.unaligned.fasta"; @@ -127,7 +134,7 @@ BlastDB::BlastDB(string b) : Database() { blastFileName = pid + toString(randNumber) + ".blast"; string formatdbCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) formatdbCommand = path + "formatdb"; // format the database, -o option gives us the ability #else formatdbCommand = path + "formatdb.exe"; @@ -142,7 +149,7 @@ BlastDB::BlastDB(string b) : Database() { if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe."); m->mothurOutEndLine(); m->control_pressed = true; } string blastCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) blastCommand = path + "blastall"; // format the database, -o option gives us the ability #else blastCommand = path + "blastall.exe"; @@ -158,7 +165,7 @@ BlastDB::BlastDB(string b) : Database() { string megablastCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) megablastCommand = path + "megablast"; // format the database, -o option gives us the ability #else megablastCommand = path + "megablast.exe"; @@ -206,10 +213,7 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { ofstream queryFile; int randNumber = rand(); - string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); -#endif + string pid = scrubName(seq->getName()); m->openOutputFile((queryFileName+pid+toString(randNumber)), queryFile); queryFile << '>' << seq->getName() << endl; @@ -222,7 +226,7 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { // long. With this setting, it seems comparable in speed to the suffix tree approach. string blastCommand; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) blastCommand = path + "blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n); blastCommand += (" -i " + (queryFileName+pid+toString(randNumber)) + " -o " + blastFileName+pid+toString(randNumber)); @@ -273,10 +277,7 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { ofstream queryFile; int randNumber = rand(); - string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); -#endif + string pid = scrubName(seq->getName()); m->openOutputFile((queryFileName+pid+toString(randNumber)), queryFile); queryFile << '>' << seq->getName() << endl; @@ -288,7 +289,7 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { // long. With this setting, it seems comparable in speed to the suffix tree approach. //7000004128189528left 0 100 66 0 0 1 66 61 126 1e-31 131 string blastCommand; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) blastCommand = path + "megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn blastCommand += (" -i " + (queryFileName+pid+toString(randNumber)) + " -o " + blastFileName+pid+toString(randNumber)); #else @@ -362,7 +363,7 @@ void BlastDB::generateDB() { string formatdbCommand; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) formatdbCommand = path + "formatdb -p F -o T -i " + dbFileName; // format the database, -o option gives us the ability #else //formatdbCommand = path + "blast\\bin\\formatdb -p F -o T -i " + dbFileName; // format the database, -o option gives us the ability @@ -382,6 +383,24 @@ void BlastDB::generateDB() { } } /**************************************************************************************************/ +string BlastDB::scrubName(string seqName) { + try { + + string cleanName = ""; + + for (int i = 0; i < seqName.length(); i++) { + if (isalnum(seqName[i])) { cleanName += seqName[i]; } + else { cleanName += "_"; } + } + + return cleanName; + } + catch(exception& e) { + m->errorOut(e, "BlastDB", "scrubName"); + exit(1); + } +} +/**************************************************************************************************/ /**************************************************************************************************/