X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=chimeraslayer.cpp;h=102db7478223d0027e676304288552a2148966fb;hp=bd1908dd035f6a123ee1f749dfb47579ef5b831d;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=58cc09a375d1e1afceef3b036574ff21394ccc4d diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index bd1908d..102db74 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -8,294 +8,1284 @@ */ #include "chimeraslayer.h" +#include "chimerarealigner.h" +#include "kmerdb.hpp" +#include "blastdb.hpp" //*************************************************************************************************************** -ChimeraSlayer::ChimeraSlayer(string filename, string temp) { fastafile = filename; templateFile = temp; } +ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, +int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() { + try { + fastafile = file; + templateFileName = temp; templateSeqs = readSeqs(temp); + searchMethod = mode; + kmerSize = k; + match = ms; + misMatch = mms; + window = win; + divR = div; + minSim = minsim; + minCov = mincov; + minBS = minbs; + minSNP = minsnp; + parents = par; + iters = it; + increment = inc; + numWanted = numw; + realign = r; + trimChimera = trim; + numNoParents = 0; + blastlocation = blas; + threadID = tid; + + doPrep(); + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); + exit(1); + } +} //*************************************************************************************************************** - -ChimeraSlayer::~ChimeraSlayer() { +//template=self, byGroup parameter used for mpienabled version to read the template as MPI_COMM_SELF instead of MPI_COMM_WORLD +ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map& prior, string mode, int k, int ms, int mms, int win, float div, + int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : Chimera() { try { - for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; } - for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } + byGroup = bg; + fastafile = file; templateSeqs = readSeqs(fastafile); + templateFileName = temp; + searchMethod = mode; + kmerSize = k; + match = ms; + misMatch = mms; + window = win; + divR = div; + minSim = minsim; + minCov = mincov; + minBS = minbs; + minSNP = minsnp; + parents = par; + iters = it; + increment = inc; + numWanted = numw; + realign = r; + trimChimera = trim; + priority = prior; + numNoParents = 0; + blastlocation = blas; + threadID = tid; + + + createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap + + if (searchMethod == "distance") { + //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap + + //run filter on template copying templateSeqs into filteredTemplateSeqs + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { break; } + + Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned()); + runFilter(newSeq); + filteredTemplateSeqs.push_back(newSeq); + } + } } catch(exception& e) { - errorOut(e, "ChimeraSlayer", "~ChimeraSlayer"); + m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); exit(1); } -} +} //*************************************************************************************************************** -void ChimeraSlayer::print(ostream& out) { +//template=self +ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map& prior, string mode, int k, int ms, int mms, int win, float div, + int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() { try { - mothurOutEndLine(); + fastafile = file; templateSeqs = readSeqs(fastafile); + templateFileName = temp; + searchMethod = mode; + kmerSize = k; + match = ms; + misMatch = mms; + window = win; + divR = div; + minSim = minsim; + minCov = mincov; + minBS = minbs; + minSNP = minsnp; + parents = par; + iters = it; + increment = inc; + numWanted = numw; + realign = r; + trimChimera = trim; + priority = prior; + numNoParents = 0; + blastlocation = blas; + threadID = tid; + - for (int i = 0; i < querySeqs.size(); i++) { + createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap - if (chimeraFlags[i] == "yes") { - mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine(); + if (searchMethod == "distance") { + //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap - }else{ - out << querySeqs[i]->getName() << "\tno" << endl; - mothurOut("no"); + //run filter on template copying templateSeqs into filteredTemplateSeqs + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { break; } + + Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned()); + runFilter(newSeq); + filteredTemplateSeqs.push_back(newSeq); } } -/* - - my $div_ratio_QLA_QRB = $data_struct->{div_ratio_QLA_QRB}; - my $div_ratio_QRA_QLB = $data_struct->{div_ratio_QLB_QRA}; - - my $per_id_QLA = $data_struct->{per_id_QLA}; - my $per_id_QRB = $data_struct->{per_id_QRB}; - my $per_id_AB = $data_struct->{per_id_AB}; - my $per_id_QA = $data_struct->{per_id_QA}; - my $per_id_QB = $data_struct->{per_id_QB}; - my $per_id_LAB = $data_struct->{per_id_LAB}; - my $per_id_RAB = $data_struct->{per_id_RAB}; - my $per_id_QRA = $data_struct->{per_id_QRA}; - my $per_id_QLB = $data_struct->{per_id_QLB}; - my $per_id_QLB_QRA = $data_struct->{per_id_QLB_QRA}; - my $per_id_QLA_QRB = $data_struct->{per_id_QLA_QRB}; - - my $win_left_end5 = $data_struct->{win_left_end5}; - my $win_left_end3 = $data_struct->{win_left_end3}; - my $win_right_end5 = $data_struct->{win_right_end5}; - my $win_right_end3 = $data_struct->{win_right_end3}; - my $Q = $data_struct->{query_alignment}; - my $A = $data_struct->{parent_A_alignment}; - my $B = $data_struct->{parent_B_alignment}; - my $BS_A = $data_struct->{BS_A}; - my $BS_B = $data_struct->{BS_B}; - - my @Q_chars = @{$Q->{align}}; - my @A_chars = @{$A->{align}}; - my @B_chars = @{$B->{align}}; - - my $query_acc = $Q->{acc}; - my $A_acc = $A->{acc}; - my $B_acc = $B->{acc}; - - my $break_left = $Q->{seqPos}->[$win_left_end3]; - my $break_right = $Q->{seqPos}->[$win_right_end5]; - - - cout << "//\n## CHIMERA\t" << querySeqs[i]->getName() << "\t" << $break_left-$break_right" << endl - << "\tDIV_QLARB: ". sprintf("%.3f", $div_ratio_QLA_QRB) - << "\tBS_QLARB: " . sprintf("%.2f", $BS_A) - << "\tDIV_QRALB: " . sprintf("%.3f", $div_ratio_QRA_QLB) - << "\tBS_QRALB: " . sprintf("%.2f", $BS_B) - << "\t$A_acc\t$B_acc" - << "\tbreakpoint: $break_left-$break_right\n\n"; - - ## draw illustration: - - print " Per_id parents: " . sprintf("%.2f", $per_id_AB) . "\n\n"; - print " Per_id(Q,A): " . sprintf("%.2f", $per_id_QA) . "\n"; - print "--------------------------------------------------- A: $A_acc\n" - . " " . sprintf("%.2f", $per_id_QLA) . " " . sprintf("%.2f", $per_id_QRA) . "\n" - . "~~~~~~~~~~~~~~~~~~~~~~~~\\ /~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n" - . "DivR: " . sprintf("%.3f", $div_ratio_QLA_QRB) . " BS: " . sprintf("%.2f", $BS_A) . " |\n" - . "Per_id(QLA,QRB): " . sprintf("%.2f", $per_id_QLA_QRB) . " |\n" - . " |\n" - . " (L-AB: " . sprintf("%.2f", $per_id_LAB) . ") | (R-AB: " . sprintf("%.2f", $per_id_RAB) . ")\n" - . " WinL:$win_left_end5-$win_left_end3 | WinR:$win_right_end5-$win_right_end3\n" - . " |\n" - . "Per_id(QLB,QRA): " . sprintf("%.2f", $per_id_QLB_QRA) . " |\n" - . "DivR: " . sprintf("%.3f", $div_ratio_QRA_QLB) . " BS: " . sprintf("%.2f", $BS_B) . " |\n" - . "~~~~~~~~~~~~~~~~~~~~~~~~/ \\~~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n" - . " " . sprintf("%.2f", $per_id_QLB) . " " . sprintf("%.2f", $per_id_QRB) . "\n" - . "---------------------------------------------------- B: $B_acc\n"; - print " Per_id(Q,B): ". sprintf("%.2f", $per_id_QB) . "\n\n"; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); + exit(1); + } +} +//*************************************************************************************************************** +int ChimeraSlayer::doPrep() { + try { + if (searchMethod == "distance") { + //read in all query seqs + vector tempQuerySeqs = readSeqs(fastafile); + + vector temp = templateSeqs; + for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); } + + createFilter(temp, 0.0); //just removed columns where all seqs have a gap + + for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; } + + if (m->control_pressed) { return 0; } + + //run filter on template copying templateSeqs into filteredTemplateSeqs + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { return 0; } + + Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned()); + runFilter(newSeq); + filteredTemplateSeqs.push_back(newSeq); + } + } + string kmerDBNameLeft; + string kmerDBNameRight; - my $deltaL = $per_id_QLA - $per_id_QLB; - my $deltaR = $per_id_QRA - $per_id_QRB; + //generate the kmerdb to pass to maligner + if (searchMethod == "kmer") { + string templatePath = m->hasPath(templateFileName); + string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName)); + databaseRight = new KmerDB(rightTemplateFileName, kmerSize); + + string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName)); + databaseLeft = new KmerDB(leftTemplateFileName, kmerSize); + #ifdef USE_MPI + for (int i = 0; i < templateSeqs.size(); i++) { + + if (m->control_pressed) { return 0; } + + string leftFrag = templateSeqs[i]->getUnaligned(); + leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); + + Sequence leftTemp(templateSeqs[i]->getName(), leftFrag); + databaseLeft->addSequence(leftTemp); + } + databaseLeft->generateDB(); + databaseLeft->setNumSeqs(templateSeqs.size()); + + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { return 0; } + + string rightFrag = templateSeqs[i]->getUnaligned(); + rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); + + Sequence rightTemp(templateSeqs[i]->getName(), rightFrag); + databaseRight->addSequence(rightTemp); + } + databaseRight->generateDB(); + databaseRight->setNumSeqs(templateSeqs.size()); - print "DeltaL: " . sprintf("%.2f", $deltaL) . " DeltaR: " . sprintf("%.2f", $deltaR) . "\n\n"; - - unless ($printAlignmentsFlag) { return; } - - - ## build the left windows: - my @Q_left_win = @Q_chars[$win_left_end5..$win_left_end3]; - my @A_left_win = @A_chars[$win_left_end5..$win_left_end3]; - my @B_left_win = @B_chars[$win_left_end5..$win_left_end3]; - - &print_alignment($A_acc, \@A_left_win, - $query_acc, \@Q_left_win, - $B_acc, \@B_left_win); - - print "\t\t** Breakpoint **\n\n"; - - my @Q_right_win = @Q_chars[$win_right_end5..$win_right_end3]; - my @A_right_win = @A_chars[$win_right_end5..$win_right_end3]; - my @B_right_win = @B_chars[$win_right_end5..$win_right_end3]; - - &print_alignment($A_acc, \@A_right_win, - $query_acc, \@Q_right_win, - $B_acc, \@B_right_win); - - return; -} + #else + //leftside + kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; + ifstream kmerFileTestLeft(kmerDBNameLeft.c_str()); + bool needToGenerateLeft = true; + + if(kmerFileTestLeft){ + bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion()); + if (GoodFile) { needToGenerateLeft = false; } + } + + if(needToGenerateLeft){ + + for (int i = 0; i < templateSeqs.size(); i++) { + + if (m->control_pressed) { return 0; } + + string leftFrag = templateSeqs[i]->getUnaligned(); + leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); + + Sequence leftTemp(templateSeqs[i]->getName(), leftFrag); + databaseLeft->addSequence(leftTemp); + } + databaseLeft->generateDB(); + + }else { + databaseLeft->readKmerDB(kmerFileTestLeft); + } + kmerFileTestLeft.close(); + + databaseLeft->setNumSeqs(templateSeqs.size()); + + //rightside + kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; + ifstream kmerFileTestRight(kmerDBNameRight.c_str()); + bool needToGenerateRight = true; + + if(kmerFileTestRight){ + bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion()); + if (GoodFile) { needToGenerateRight = false; } + } + + if(needToGenerateRight){ + + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { return 0; } + + string rightFrag = templateSeqs[i]->getUnaligned(); + rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); + + Sequence rightTemp(templateSeqs[i]->getName(), rightFrag); + databaseRight->addSequence(rightTemp); + } + databaseRight->generateDB(); + + }else { + databaseRight->readKmerDB(kmerFileTestRight); + } + kmerFileTestRight.close(); + + databaseRight->setNumSeqs(templateSeqs.size()); + #endif + }else if (searchMethod == "blast") { + + //generate blastdb + databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID); + + if (m->control_pressed) { return 0; } + for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); } + databaseLeft->generateDB(); + databaseLeft->setNumSeqs(templateSeqs.size()); + } + + return 0; -#### + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "doprep"); + exit(1); + } +} +//*************************************************************************************************************** +vector ChimeraSlayer::getTemplate(Sequence q, vector& userTemplateFiltered) { + try { + + //when template=self, the query file is sorted from most abundance to least abundant + //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant + vector userTemplate; - */ + int myAbund = priority[q.getName()]; + + for (int i = 0; i < templateSeqs.size(); i++) { + + if (m->control_pressed) { return userTemplate; } + + //have I reached a sequence with the same abundance as myself? + if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; } + + //if its am not chimeric add it + if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { + userTemplate.push_back(templateSeqs[i]); + if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); } + } + } + + //avoids nuisance error from formatdb for making blank blast database + if (userTemplate.size() == 0) { + return userTemplate; + } + + string kmerDBNameLeft; + string kmerDBNameRight; + + //generate the kmerdb to pass to maligner + if (searchMethod == "kmer") { + string templatePath = m->hasPath(templateFileName); + string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName)); + databaseRight = new KmerDB(rightTemplateFileName, kmerSize); + + string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName)); + databaseLeft = new KmerDB(leftTemplateFileName, kmerSize); +#ifdef USE_MPI + for (int i = 0; i < userTemplate.size(); i++) { + + if (m->control_pressed) { return userTemplate; } + string leftFrag = userTemplate[i]->getUnaligned(); + leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); + + Sequence leftTemp(userTemplate[i]->getName(), leftFrag); + databaseLeft->addSequence(leftTemp); + } + databaseLeft->generateDB(); + databaseLeft->setNumSeqs(userTemplate.size()); + + for (int i = 0; i < userTemplate.size(); i++) { + if (m->control_pressed) { return userTemplate; } + + string rightFrag = userTemplate[i]->getUnaligned(); + rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); + + Sequence rightTemp(userTemplate[i]->getName(), rightFrag); + databaseRight->addSequence(rightTemp); + } + databaseRight->generateDB(); + databaseRight->setNumSeqs(userTemplate.size()); + +#else + + + for (int i = 0; i < userTemplate.size(); i++) { + + if (m->control_pressed) { return userTemplate; } + + string leftFrag = userTemplate[i]->getUnaligned(); + leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); + + Sequence leftTemp(userTemplate[i]->getName(), leftFrag); + databaseLeft->addSequence(leftTemp); + } + databaseLeft->generateDB(); + databaseLeft->setNumSeqs(userTemplate.size()); + + for (int i = 0; i < userTemplate.size(); i++) { + if (m->control_pressed) { return userTemplate; } + + string rightFrag = userTemplate[i]->getUnaligned(); + rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); + + Sequence rightTemp(userTemplate[i]->getName(), rightFrag); + databaseRight->addSequence(rightTemp); + } + databaseRight->generateDB(); + databaseRight->setNumSeqs(userTemplate.size()); +#endif + }else if (searchMethod == "blast") { + + //generate blastdb + databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID); + + if (m->control_pressed) { return userTemplate; } + + for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); } + databaseLeft->generateDB(); + databaseLeft->setNumSeqs(userTemplate.size()); + } + + return userTemplate; + } catch(exception& e) { - errorOut(e, "ChimeraSlayer", "print"); + m->errorOut(e, "ChimeraSlayer", "getTemplate"); exit(1); } } //*************************************************************************************************************** -void ChimeraSlayer::getChimeras() { +ChimeraSlayer::~ChimeraSlayer() { + if (templateFileName != "self") { + if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } + else if (searchMethod == "blast") { delete databaseLeft; } + } +} +//*************************************************************************************************************** +void ChimeraSlayer::printHeader(ostream& out) { + m->mothurOutEndLine(); + m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results."); + m->mothurOutEndLine(); + + out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n"; +} +//*************************************************************************************************************** +Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) { try { + Sequence trim; + if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); } - //read in query sequences and subject sequences - mothurOut("Reading sequences and template file... "); cout.flush(); - querySeqs = readSeqs(fastafile); - templateSeqs = readSeqs(templateFile); - mothurOut("Done."); mothurOutEndLine(); - - int numSeqs = querySeqs.size(); + if (chimeraFlags == "yes") { + string chimeraFlag = "no"; + if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR) + || + (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + + + if (chimeraFlag == "yes") { + if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) { + m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine(); + outAcc << querySeq.getName() << endl; + + if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } + + if (trimChimera) { + int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart; + int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart; + + string newAligned = trim.getAligned(); + + if (lengthLeft > lengthRight) { //trim right + for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //trim left + for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; } + } + trim.setAligned(newAligned); + } + } + } + + printBlock(chimeraResults[0], chimeraFlag, out); + out << endl; + }else { + out << querySeq.getName() << "\tno" << endl; + } - chimeraResults.resize(numSeqs); - chimeraFlags.resize(numSeqs, "no"); + return trim; - //break up file if needed - int linesPerProcess = numSeqs / processors ; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "print"); + exit(1); + } +} +//*************************************************************************************************************** +Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) { + try { + Sequence trim; + + if (trimChimera) { + string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned(); + trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); + } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - //find breakup of sequences for all times we will Parallelize - if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); } - else { - //fill line pairs - for (int i = 0; i < (processors-1); i++) { - lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess))); + if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) { + + string chimeraFlag = "no"; + if (leftPiece.flag == "yes") { + + if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR) + || + (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + } + + if (rightPiece.flag == "yes") { + if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR) + || + (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + } + + bool rightChimeric = false; + bool leftChimeric = false; + + if (chimeraFlag == "yes") { + //which peice is chimeric or are both + if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } } + if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } } + + if (rightChimeric || leftChimeric) { + m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine(); + outAcc << querySeq.getName() << endl; + + if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } + + if (trimChimera) { + string newAligned = trim.getAligned(); + + //right side is fine so keep that + if ((leftChimeric) && (!rightChimeric)) { + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } + }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that + for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //both sides are chimeric, keep longest piece + + int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart; + int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart; + + int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4 + int length = lengthLeftLeft; + if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; } + + int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart; + int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart; + + if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; } + if (lengthRightRight > length) { longest = 4; } + + if (longest == 1) { //leftleft + for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else if (longest == 2) { //leftright + //get rid of leftleft + for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; } + //get rid of right + for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else if (longest == 3) { //rightleft + //get rid of left + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } + //get rid of rightright + for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //rightright + //get rid of left + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } + //get rid of rightleft + for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; } + } + } + + trim.setAligned(newAligned); + } + } - //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end - int i = processors - 1; - lines.push_back(new linePair((i*linesPerProcess), numSeqs)); } - #else - lines.push_back(new linePair(0, numSeqs)); - #endif + + printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out); + out << endl; + }else { + out << querySeq.getName() << "\tno" << endl; + } + + return trim; + + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "print"); + exit(1); + } +} + +#ifdef USE_MPI +//*************************************************************************************************************** +Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece, bool& chimFlag) { + try { + MPI_Status status; + bool results = false; + string outAccString = ""; + string outputString = ""; + chimFlag = false; - if (seqMask != "") { decalc = new DeCalculator(); } //to use below + Sequence trim; - //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity - maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim); - slayer = new Slayer(window, increment, minSim, divR); + if (trimChimera) { + string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned(); + trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); + } - for (int i = 0; i < querySeqs.size(); i++) { - string chimeraFlag = maligner->getResults(querySeqs[i]); - float percentIdentical = maligner->getPercentID(); - vector Results = maligner->getOutput(); + if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) { - //cout << querySeqs[i]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl; + string chimeraFlag = "no"; + if (leftPiece.flag == "yes") { + + if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR) + || + (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + } - for (int j = 0; j < Results.size(); j++) { - //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl; + if (rightPiece.flag == "yes") { + if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR) + || + (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } } - if (chimeraFlag == "yes") { + bool rightChimeric = false; + bool leftChimeric = false; + + cout << endl; - //get sequence that were given from maligner results - vector seqs; - for (int j = 0; j < Results.size(); j++) { - Sequence* seq = getSequence(Results[j].parent); //makes copy so you can filter and mask and not effect template + if (chimeraFlag == "yes") { + //which peice is chimeric or are both + if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } } + if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } } + + if (rightChimeric || leftChimeric) { + cout << querySeq.getName() << "\tyes" << endl; + outAccString += querySeq.getName() + "\n"; + results = true; + + if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } - //seq = NULL if error occurred in getSequence - if (seq == NULL) { break; } - else { - SeqDist member; - member.seq = seq; - member.dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal; - seqs.push_back(member); + //write to accnos file + int length = outAccString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outAccString.c_str(), length); + + MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status); + chimFlag = true; + delete buf2; + + if (trimChimera) { + string newAligned = trim.getAligned(); + + //right side is fine so keep that + if ((leftChimeric) && (!rightChimeric)) { + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } + }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that + for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //both sides are chimeric, keep longest piece + + int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart; + int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart; + + int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4 + int length = lengthLeftLeft; + if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; } + + int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart; + int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart; + + if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; } + if (lengthRightRight > length) { longest = 4; } + + if (longest == 1) { //leftleft + for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else if (longest == 2) { //leftright + //get rid of leftleft + for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; } + //get rid of right + for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else if (longest == 3) { //rightleft + //get rid of left + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } + //get rid of rightright + for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //rightright + //get rid of left + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } + //get rid of rightleft + for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; } + } + } + + trim.setAligned(newAligned); } + } + } + + outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag); + outputString += "\n"; + + //write to output file + int length = outputString.length(); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); + delete buf; + + }else { + outputString += querySeq.getName() + "\tno\n"; + + //write to output file + int length = outputString.length(); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); + delete buf; + } + + + return trim; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "print"); + exit(1); + } +} +//*************************************************************************************************************** +Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { + try { + MPI_Status status; + bool results = false; + string outAccString = ""; + string outputString = ""; + + Sequence trim; + if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); } + + if (chimeraFlags == "yes") { + string chimeraFlag = "no"; + if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR) + || + (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + - //limit number of parents to explore - default 5 - if (Results.size() > parents) { - //sort by distance - sort(seqs.begin(), seqs.end(), compareSeqDist); - //prioritize larger more similiar sequence fragments - reverse(seqs.begin(), seqs.end()); + if (chimeraFlag == "yes") { + if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) { + cout << querySeq.getName() << "\tyes" << endl; + outAccString += querySeq.getName() + "\n"; + results = true; + + if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } + + //write to accnos file + int length = outAccString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outAccString.c_str(), length); + + MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status); + delete buf2; - for (int k = seqs.size()-1; k > (parents-1); k--) { - delete seqs[k].seq; - seqs.pop_back(); + if (trimChimera) { + int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart; + int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart; + + string newAligned = trim.getAligned(); + if (lengthLeft > lengthRight) { //trim right + for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //trim left + for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; } + } + trim.setAligned(newAligned); } } + } + + outputString = getBlock(chimeraResults[0], chimeraFlag); + outputString += "\n"; + + //write to output file + int length = outputString.length(); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); + delete buf; + + }else { + outputString += querySeq.getName() + "\tno\n"; + + //write to output file + int length = outputString.length(); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); + delete buf; + } - //put seqs into vector to send to slayer - vector seqsForSlayer; - for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); } + return trim; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "print"); + exit(1); + } +} +#endif + +//*************************************************************************************************************** +int ChimeraSlayer::getChimeras(Sequence* query) { + try { + + trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned()); + printResults.trimQuery = trimQuery; + + chimeraFlags = "no"; + printResults.flag = "no"; + + querySeq = *query; + + //you must create a template + vector thisTemplate; + vector thisFilteredTemplate; + if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; } + else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases + + if (m->control_pressed) { return 0; } + if (thisTemplate.size() == 0) { return 0; } //not chimeric + + //moved this out of maligner - 4/29/11 + vector refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate); + + Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); + Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS); + + if (templateFileName == "self") { + if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } + else if (searchMethod == "blast") { delete databaseLeft; } + } + + if (m->control_pressed) { return 0; } + + string chimeraFlag = maligner.getResults(*query, decalc); + + if (m->control_pressed) { return 0; } + + vector Results = maligner.getOutput(); + + //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } + + if (chimeraFlag == "yes") { + + if (realign) { + vector parents; + for (int i = 0; i < Results.size(); i++) { + parents.push_back(Results[i].parentAligned); + } + + ChimeraReAligner realigner; + realigner.reAlign(query, parents); + + } - //mask then send to slayer... - if (seqMask != "") { - decalc->setMask(seqMask); +// cout << query->getAligned() << endl; + //get sequence that were given from maligner results + vector seqs; + map removeDups; + map::iterator itDup; + map parentNameSeq; + map::iterator itSeq; + for (int j = 0; j < Results.size(); j++) { - //mask querys - decalc->runMask(querySeqs[i]); - - //mask parents - for (int k = 0; k < seqsForSlayer.size(); k++) { - decalc->runMask(seqsForSlayer[k]); + float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal; + //only add if you are not a duplicate +// cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl; + + + if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity + + itDup = removeDups.find(Results[j].parent); + if (itDup == removeDups.end()) { //this is not duplicate + removeDups[Results[j].parent] = dist; + parentNameSeq[Results[j].parent] = Results[j].parentAligned; + }else if (dist > itDup->second) { //is this a stronger number for this parent + removeDups[Results[j].parent] = dist; + parentNameSeq[Results[j].parent] = Results[j].parentAligned; } - + } - //send to slayer - chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer); - chimeraResults[i] = slayer->getOutput(); + } - //free memory - for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } + for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) { + itSeq = parentNameSeq.find(itDup->first); + Sequence seq(itDup->first, itSeq->second); + + SeqCompare member; + member.seq = seq; + member.dist = itDup->second; + seqs.push_back(member); } - } - //free memory - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } + //limit number of parents to explore - default 3 + if (Results.size() > parents) { + //sort by distance + sort(seqs.begin(), seqs.end(), compareSeqCompare); + //prioritize larger more similiar sequence fragments + reverse(seqs.begin(), seqs.end()); + + //for (int k = seqs.size()-1; k > (parents-1); k--) { + // delete seqs[k].seq; + //seqs.pop_back(); + //} + } - if (seqMask != "") { - delete decalc; - } + //put seqs into vector to send to slayer + +// cout << query->getAligned() << endl; + vector seqsForSlayer; + for (int k = 0; k < seqs.size(); k++) { +// cout << seqs[k].seq->getAligned() << endl; + seqsForSlayer.push_back(seqs[k].seq); +// cout << seqs[k].seq->getName() << endl; + } + + if (m->control_pressed) { return 0; } + //send to slayer + chimeraFlags = slayer.getResults(*query, seqsForSlayer); + if (m->control_pressed) { return 0; } + chimeraResults = slayer.getOutput(); + printResults.flag = chimeraFlags; + printResults.results = chimeraResults; + + //free memory + //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } + } + //cout << endl << endl; + return 0; } catch(exception& e) { - errorOut(e, "ChimeraSlayer", "getChimeras"); + m->errorOut(e, "ChimeraSlayer", "getChimeras"); exit(1); } } //*************************************************************************************************************** -Sequence* ChimeraSlayer::getSequence(string name) { - try{ - Sequence* temp; +void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){ + try { + out << querySeq.getName() << '\t'; + out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t'; + + out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t'; + out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t'; + + out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t'; + + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "printBlock"); + exit(1); + } +} +//*************************************************************************************************************** +void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){ + try { + + if ((leftChimeric) && (!rightChimeric)) { //print left + out << querySeq.getName() << '\t'; + out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t'; + + out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t'; + out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t'; + + out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t'; + + }else if ((!leftChimeric) && (rightChimeric)) { //print right + out << querySeq.getName() << '\t'; + out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t'; + + out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t'; + out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t'; + + out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t'; + + }else { //print both results + if (leftdata.flag == "yes") { + out << querySeq.getName() + "_LEFT" << '\t'; + out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t'; + + out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t'; + out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t'; + + out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t'; + } + + if (rightdata.flag == "yes") { + if (leftdata.flag == "yes") { out << endl; } + + out << querySeq.getName() + "_RIGHT"<< '\t'; + out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t'; + + out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t'; + out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t'; + + out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t'; - //look through templateSeqs til you find it - int spot = -1; - for (int i = 0; i < templateSeqs.size(); i++) { - if (name == templateSeqs[i]->getName()) { - spot = i; - break; } } + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "printBlock"); + exit(1); + } +} +//*************************************************************************************************************** +string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){ + try { - if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; } + string out = ""; - temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned()); + if ((leftChimeric) && (!rightChimeric)) { //get left + out += querySeq.getName() + "\t"; + out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t"; + + out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t"; + out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t"; + + out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t"; + + }else if ((!leftChimeric) && (rightChimeric)) { //print right + out += querySeq.getName() + "\t"; + out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t"; + + out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t"; + out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t"; + + out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t"; + + }else { //print both results + + if (leftdata.flag == "yes") { + out += querySeq.getName() + "_LEFT\t"; + out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t"; + + out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t"; + out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t"; + + out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t"; + } + + if (rightdata.flag == "yes") { + if (leftdata.flag == "yes") { out += "\n"; } + out += querySeq.getName() + "_RIGHT\t"; + out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t"; + + out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t"; + out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t"; + + out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t"; + } + } + + return out; + + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "getBlock"); + exit(1); + } +} +//*************************************************************************************************************** +string ChimeraSlayer::getBlock(data_struct data, string flag){ + try { + + string outputString = ""; + + outputString += querySeq.getName() + "\t"; + outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t"; + + outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t"; + outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t"; + + outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t"; + + return outputString; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "getBlock"); + exit(1); + } +} +//*************************************************************************************************************** +vector ChimeraSlayer::getRefSeqs(Sequence q, vector& thisTemplate, vector& thisFilteredTemplate){ + try { + + vector refSeqs; + + if (searchMethod == "distance") { + //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate + Sequence* newSeq = new Sequence(q.getName(), q.getAligned()); + runFilter(newSeq); + refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim); + delete newSeq; + }else if (searchMethod == "blast") { + refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes + }else if (searchMethod == "kmer") { + refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes + }else { m->mothurOut("not valid search."); exit(1); } //should never get here + + return refSeqs; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "getRefSeqs"); + exit(1); + } +} +//***************************************************************************************************************/ +vector ChimeraSlayer::getBlastSeqs(Sequence q, vector& db, int num) { + try { + + vector refResults; + + //get parts of query + string queryUnAligned = q.getUnaligned(); + string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence + string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence +//cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl; + Sequence* queryLeft = new Sequence(q.getName(), leftQuery); + Sequence* queryRight = new Sequence(q.getName(), rightQuery); + + vector tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim); + vector tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim); + + + //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl; +// vector smaller; +// vector larger; +// +// if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; } +// else { smaller = tempIndexesLeft; larger = tempIndexesRight; } + + //merge results + map seen; + map::iterator it; + vector mergedResults; + + int index = 0; +// for (int i = 0; i < smaller.size(); i++) { + while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){ + + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add left if you havent already + it = seen.find(tempIndexesLeft[index]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesLeft[index]); + seen[tempIndexesLeft[index]] = tempIndexesLeft[index]; + } + + //add right if you havent already + it = seen.find(tempIndexesRight[index]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesRight[index]); + seen[tempIndexesRight[index]] = tempIndexesRight[index]; + } + index++; + } + - return temp; + for (int i = index; i < tempIndexesLeft.size(); i++) { + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add right if you havent already + it = seen.find(tempIndexesLeft[i]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesLeft[i]); + seen[tempIndexesLeft[i]] = tempIndexesLeft[i]; + } + } + + for (int i = index; i < tempIndexesRight.size(); i++) { + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add right if you havent already + it = seen.find(tempIndexesRight[i]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesRight[i]); + seen[tempIndexesRight[i]] = tempIndexesRight[i]; + } + } + //string qname = q->getName().substr(0, q->getName().find_last_of('_')); + //cout << qname << endl; + + if (mergedResults.size() == 0) { numNoParents++; } + + for (int i = 0; i < mergedResults.size(); i++) { + //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl; + if (db[mergedResults[i]]->getName() != q.getName()) { + Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); + refResults.push_back(temp); + } + } + //cout << endl << endl; + + delete queryRight; + delete queryLeft; + + return refResults; } catch(exception& e) { - errorOut(e, "ChimeraSlayer", "getSequence"); + m->errorOut(e, "ChimeraSlayer", "getBlastSeqs"); exit(1); } } //*************************************************************************************************************** +vector ChimeraSlayer::getKmerSeqs(Sequence q, vector& db, int num) { + try { + vector refResults; + + //get parts of query + string queryUnAligned = q.getUnaligned(); + string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence + string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence + + Sequence* queryLeft = new Sequence(q.getName(), leftQuery); + Sequence* queryRight = new Sequence(q.getName(), rightQuery); + + vector tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num); + vector tempIndexesRight = databaseRight->findClosestSequences(queryRight, num); + + //merge results + map seen; + map::iterator it; + vector mergedResults; + + int index = 0; + // for (int i = 0; i < smaller.size(); i++) { + while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){ + + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add left if you havent already + it = seen.find(tempIndexesLeft[index]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesLeft[index]); + seen[tempIndexesLeft[index]] = tempIndexesLeft[index]; + } + + //add right if you havent already + it = seen.find(tempIndexesRight[index]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesRight[index]); + seen[tempIndexesRight[index]] = tempIndexesRight[index]; + } + index++; + } + + + for (int i = index; i < tempIndexesLeft.size(); i++) { + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add right if you havent already + it = seen.find(tempIndexesLeft[i]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesLeft[i]); + seen[tempIndexesLeft[i]] = tempIndexesLeft[i]; + } + } + + for (int i = index; i < tempIndexesRight.size(); i++) { + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add right if you havent already + it = seen.find(tempIndexesRight[i]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesRight[i]); + seen[tempIndexesRight[i]] = tempIndexesRight[i]; + } + } + + for (int i = 0; i < mergedResults.size(); i++) { + //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl; + if (db[mergedResults[i]]->getName() != q.getName()) { + Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); + refResults.push_back(temp); + + } + } + //cout << endl; + delete queryRight; + delete queryLeft; + + return refResults; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "getKmerSeqs"); + exit(1); + } +} +//***************************************************************************************************************