X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimeraslayer.cpp;h=3497579167210514496ac904f6c9f19d17140b16;hb=c568e44459665aca4d8233d45514ab7e0152feeb;hp=769032856bb5ca0801b21b3457ea5fa36bc90a20;hpb=81276c241b984898f8d30ad123c00592ee6db7b8;p=mothur.git diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 7690328..3497579 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -10,29 +10,112 @@ #include "chimeraslayer.h" #include "chimerarealigner.h" #include "kmerdb.hpp" +#include "blastdb.hpp" //*************************************************************************************************************** -ChimeraSlayer::ChimeraSlayer(string mode, bool r, string f) : searchMethod(mode), realign(r), fastafile(f) { - decalc = new DeCalculator(); +ChimeraSlayer::ChimeraSlayer(string file, string temp, string mode, int k, int ms, int mms, int win, float div, +int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() { + try { + fastafile = file; + templateFileName = temp; templateSeqs = readSeqs(temp); + searchMethod = mode; + kmerSize = k; + match = ms; + misMatch = mms; + window = win; + divR = div; + minSim = minsim; + minCov = mincov; + minBS = minbs; + minSNP = minsnp; + parents = par; + iters = it; + increment = inc; + numWanted = numw; + realign = r; + + decalc = new DeCalculator(); + + doPrep(); + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); + exit(1); + } } //*************************************************************************************************************** -void ChimeraSlayer::doPrep() { +int ChimeraSlayer::doPrep() { try { - + + //read in all query seqs + vector tempQuerySeqs = readSeqs(fastafile); + + vector temp = templateSeqs; + for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); } + + createFilter(temp, 0.0); //just removed columns where all seqs have a gap + + for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; } + + if (m->control_pressed) { return 0; } + + //run filter on template + for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); } + string kmerDBNameLeft; string kmerDBNameRight; - + //generate the kmerdb to pass to maligner if (searchMethod == "kmer") { + string templatePath = m->hasPath(templateFileName); + string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName)); + databaseRight = new KmerDB(rightTemplateFileName, kmerSize); + + string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName)); + databaseLeft = new KmerDB(leftTemplateFileName, kmerSize); + #ifdef USE_MPI + for (int i = 0; i < templateSeqs.size(); i++) { + + if (m->control_pressed) { return 0; } + + string leftFrag = templateSeqs[i]->getUnaligned(); + leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); + + Sequence leftTemp(templateSeqs[i]->getName(), leftFrag); + databaseLeft->addSequence(leftTemp); + } + databaseLeft->generateDB(); + databaseLeft->setNumSeqs(templateSeqs.size()); + + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { return 0; } + + string rightFrag = templateSeqs[i]->getUnaligned(); + rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); + + Sequence rightTemp(templateSeqs[i]->getName(), rightFrag); + databaseRight->addSequence(rightTemp); + } + databaseRight->generateDB(); + databaseRight->setNumSeqs(templateSeqs.size()); + + #else //leftside - string leftTemplateFileName = "left." + templateFileName; - databaseLeft = new KmerDB(leftTemplateFileName, kmerSize); kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; ifstream kmerFileTestLeft(kmerDBNameLeft.c_str()); + bool needToGenerateLeft = true; + + if(kmerFileTestLeft){ + bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion()); + if (GoodFile) { needToGenerateLeft = false; } + } - if(!kmerFileTestLeft){ + if(needToGenerateLeft){ for (int i = 0; i < templateSeqs.size(); i++) { + + if (m->control_pressed) { return 0; } + string leftFrag = templateSeqs[i]->getUnaligned(); leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); @@ -49,14 +132,20 @@ void ChimeraSlayer::doPrep() { databaseLeft->setNumSeqs(templateSeqs.size()); //rightside - string rightTemplateFileName = "right." + templateFileName; - databaseRight = new KmerDB(rightTemplateFileName, kmerSize); kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; ifstream kmerFileTestRight(kmerDBNameRight.c_str()); + bool needToGenerateRight = true; + + if(kmerFileTestRight){ + bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion()); + if (GoodFile) { needToGenerateRight = false; } + } - if(!kmerFileTestRight){ + if(needToGenerateRight){ for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { return 0; } + string rightFrag = templateSeqs[i]->getUnaligned(); rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); @@ -71,54 +160,40 @@ void ChimeraSlayer::doPrep() { kmerFileTestRight.close(); databaseRight->setNumSeqs(templateSeqs.size()); - - } + #endif + }else if (searchMethod == "blast") { - int start = time(NULL); - //filter the sequences - //read in all query seqs - ifstream in; - openInputFile(fastafile, in); - - vector tempQuerySeqs; - while(!in.eof()){ - Sequence* s = new Sequence(in); - gobble(in); - - if (s->getName() != "") { tempQuerySeqs.push_back(s); } + //generate blastdb + databaseLeft = new BlastDB(-2.0, -1.0, match, misMatch); + for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); } + databaseLeft->generateDB(); + databaseLeft->setNumSeqs(templateSeqs.size()); } - in.close(); - vector temp = templateSeqs; - for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); } - - createFilter(temp, 0.0); //just removed columns where all seqs have a gap - - for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; } - - //run filter on template - for (int i = 0; i < templateSeqs.size(); i++) { runFilter(templateSeqs[i]); } - - mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to filter."); mothurOutEndLine(); + return 0; } catch(exception& e) { - errorOut(e, "ChimeraSlayer", "doprep"); + m->errorOut(e, "ChimeraSlayer", "doprep"); exit(1); } } //*************************************************************************************************************** -ChimeraSlayer::~ChimeraSlayer() { delete decalc; if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } } +ChimeraSlayer::~ChimeraSlayer() { + delete decalc; + if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } + else if (searchMethod == "blast") { delete databaseLeft; } +} //*************************************************************************************************************** void ChimeraSlayer::printHeader(ostream& out) { - mothurOutEndLine(); - mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results."); - mothurOutEndLine(); + m->mothurOutEndLine(); + m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results."); + m->mothurOutEndLine(); out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n"; } //*************************************************************************************************************** -void ChimeraSlayer::print(ostream& out) { +int ChimeraSlayer::print(ostream& out, ostream& outAcc) { try { if (chimeraFlags == "yes") { string chimeraFlag = "no"; @@ -129,37 +204,108 @@ void ChimeraSlayer::print(ostream& out) { if (chimeraFlag == "yes") { if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) { - mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine(); + m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine(); + outAcc << querySeq->getName() << endl; } } - printBlock(chimeraResults[0], out); + printBlock(chimeraResults[0], chimeraFlag, out); out << endl; }else { out << querySeq->getName() << "\tno" << endl; } + return 0; + } catch(exception& e) { - errorOut(e, "ChimeraSlayer", "print"); + m->errorOut(e, "ChimeraSlayer", "print"); exit(1); } } +#ifdef USE_MPI +//*************************************************************************************************************** +int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { + try { + MPI_Status status; + bool results = false; + string outAccString = ""; + string outputString = ""; + + if (chimeraFlags == "yes") { + string chimeraFlag = "no"; + if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR) + || + (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + + + if (chimeraFlag == "yes") { + if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) { + cout << querySeq->getName() << "\tyes" << endl; + outAccString += querySeq->getName() + "\n"; + results = true; + + //write to accnos file + int length = outAccString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outAccString.c_str(), length); + + MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status); + delete buf2; + } + } + + outputString = getBlock(chimeraResults[0], chimeraFlag); + outputString += "\n"; + //cout << outputString << endl; + //write to output file + int length = outputString.length(); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); + delete buf; + + }else { + outputString += querySeq->getName() + "\tno\n"; + //cout << outputString << endl; + //write to output file + int length = outputString.length(); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); + delete buf; + } + + + return results; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "print"); + exit(1); + } +} +#endif + //*************************************************************************************************************** int ChimeraSlayer::getChimeras(Sequence* query) { try { chimeraFlags = "no"; - + //filter query - spotMap = runFilter(query); + spotMap = runFilter(query); querySeq = query; //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight); slayer = new Slayer(window, increment, minSim, divR, iters, minSNP); + + if (m->control_pressed) { return 0; } string chimeraFlag = maligner->getResults(query, decalc); + if (m->control_pressed) { return 0; } vector Results = maligner->getOutput(); - + //found in testing realigning only made things worse if (realign) { ChimeraReAligner realigner(templateSeqs, match, misMatch); @@ -232,8 +378,11 @@ int ChimeraSlayer::getChimeras(Sequence* query) { spotMap = decalc->getMaskMap(); } + if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; } + //send to slayer chimeraFlags = slayer->getResults(query, seqsForSlayer); + if (m->control_pressed) { return 0; } chimeraResults = slayer->getOutput(); //free memory @@ -246,14 +395,14 @@ int ChimeraSlayer::getChimeras(Sequence* query) { return 0; } catch(exception& e) { - errorOut(e, "ChimeraSlayer", "getChimeras"); + m->errorOut(e, "ChimeraSlayer", "getChimeras"); exit(1); } } //*************************************************************************************************************** -void ChimeraSlayer::printBlock(data_struct data, ostream& out){ +void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){ try { - //out << "Name\tParentA\tParentB\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n"; + //out << ":)\n"; out << querySeq->getName() << '\t'; out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t'; @@ -263,7 +412,7 @@ void ChimeraSlayer::printBlock(data_struct data, ostream& out){ out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t'; out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t'; - out << "yes\t" << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t'; + out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t'; //out << "Similarity of parents: " << data.ab << endl; //out << "Similarity of query to parentA: " << data.qa << endl; @@ -281,9 +430,30 @@ void ChimeraSlayer::printBlock(data_struct data, ostream& out){ } catch(exception& e) { - errorOut(e, "ChimeraSlayer", "printBlock"); + m->errorOut(e, "ChimeraSlayer", "printBlock"); exit(1); } } //*************************************************************************************************************** +string ChimeraSlayer::getBlock(data_struct data, string flag){ + try { + + string outputString = ""; + + outputString += querySeq->getName() + "\t"; + outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t"; + + outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t"; + outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t"; + + outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t"; + + return outputString; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "getBlock"); + exit(1); + } +} +//***************************************************************************************************************/