X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimeraslayer.cpp;h=826f38ba962681e883809c5dcb2711939d205c99;hb=2405cc589aaaf0c44809a48fe98d3b96863dac0b;hp=8c9417ad82cd0341a88aa216a84add2a701791e3;hpb=64581f6d0e63e67d4e119601bea695ebb3f52a13;p=mothur.git diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 8c9417a..826f38b 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -45,7 +45,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num } } //*************************************************************************************************************** -ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, int k, int ms, int mms, int win, float div, +ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map& prior, string mode, int k, int ms, int mms, int win, float div, int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() { try { fastafile = file; templateSeqs = readSeqs(fastafile); @@ -66,14 +66,24 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, s numWanted = numw; realign = r; trimChimera = trim; + priority = prior; decalc = new DeCalculator(); createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap - //run filter on template - for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } templateSeqs.clear(); - + if (searchMethod == "distance") { + createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap + + //run filter on template copying templateSeqs into filteredTemplateSeqs + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { break; } + + Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned()); + runFilter(newSeq); + filteredTemplateSeqs.push_back(newSeq); + } + } } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); @@ -83,22 +93,28 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, s //*************************************************************************************************************** int ChimeraSlayer::doPrep() { try { + if (searchMethod == "distance") { + //read in all query seqs + vector tempQuerySeqs = readSeqs(fastafile); - //read in all query seqs - vector tempQuerySeqs = readSeqs(fastafile); + vector temp = templateSeqs; + for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); } - vector temp = templateSeqs; - for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); } + createFilter(temp, 0.0); //just removed columns where all seqs have a gap - createFilter(temp, 0.0); //just removed columns where all seqs have a gap + for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; } - for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; } - - if (m->control_pressed) { return 0; } - - //run filter on template - for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); } + if (m->control_pressed) { return 0; } + //run filter on template copying templateSeqs into filteredTemplateSeqs + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { return 0; } + + Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned()); + runFilter(newSeq); + filteredTemplateSeqs.push_back(newSeq); + } + } string kmerDBNameLeft; string kmerDBNameRight; @@ -201,7 +217,7 @@ int ChimeraSlayer::doPrep() { }else if (searchMethod == "blast") { //generate blastdb - databaseLeft = new BlastDB(-1.0, -1.0, 1, -3); + databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3); for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); } databaseLeft->generateDB(); @@ -217,9 +233,29 @@ int ChimeraSlayer::doPrep() { } } //*************************************************************************************************************** -int ChimeraSlayer::getTemplate(Sequence* q) { +vector ChimeraSlayer::getTemplate(Sequence* q, vector& userTemplateFiltered) { try { + //when template=self, the query file is sorted from most abundance to least abundant + //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant + vector userTemplate; + + int myAbund = priority[q->getName()]; + + for (int i = 0; i < templateSeqs.size(); i++) { + + if (m->control_pressed) { return userTemplate; } + + //have I reached a sequence with the same abundance as myself? + if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; } + + //if its am not chimeric add it + if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { + userTemplate.push_back(templateSeqs[i]); + if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); } + } + } + string kmerDBNameLeft; string kmerDBNameRight; @@ -234,7 +270,7 @@ int ChimeraSlayer::getTemplate(Sequence* q) { #ifdef USE_MPI for (int i = 0; i < userTemplate.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return userTemplate; } string leftFrag = userTemplate[i]->getUnaligned(); leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); @@ -246,7 +282,7 @@ int ChimeraSlayer::getTemplate(Sequence* q) { databaseLeft->setNumSeqs(userTemplate.size()); for (int i = 0; i < userTemplate.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return userTemplate; } string rightFrag = userTemplate[i]->getUnaligned(); rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); @@ -262,7 +298,7 @@ int ChimeraSlayer::getTemplate(Sequence* q) { for (int i = 0; i < userTemplate.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return userTemplate; } string leftFrag = userTemplate[i]->getUnaligned(); leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); @@ -274,7 +310,7 @@ int ChimeraSlayer::getTemplate(Sequence* q) { databaseLeft->setNumSeqs(userTemplate.size()); for (int i = 0; i < userTemplate.size(); i++) { - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return userTemplate; } string rightFrag = userTemplate[i]->getUnaligned(); rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); @@ -288,14 +324,14 @@ int ChimeraSlayer::getTemplate(Sequence* q) { }else if (searchMethod == "blast") { //generate blastdb - databaseLeft = new BlastDB(-1.0, -1.0, 1, -3); + databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3); - for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return 0; } databaseLeft->addSequence(*userTemplate[i]); } + for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); } databaseLeft->generateDB(); databaseLeft->setNumSeqs(userTemplate.size()); } - return 0; + return userTemplate; } catch(exception& e) { @@ -310,12 +346,6 @@ ChimeraSlayer::~ChimeraSlayer() { if (templateFileName != "self") { if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } else if (searchMethod == "blast") { delete databaseLeft; } - }else { - //delete userTemplate - for (int i = 0; i < userTemplate.size(); i++) { - delete userTemplate[i]; - } - userTemplate.clear(); } } //*************************************************************************************************************** @@ -344,16 +374,18 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) { m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine(); outAcc << querySeq->getName() << endl; + if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); } + if (trimChimera) { - int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart]; - int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart]; + int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart; + int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart; string newAligned = trim->getAligned(); if (lengthLeft > lengthRight) { //trim right - for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //trim left - for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; } + for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; } } trim->setAligned(newAligned); } @@ -364,11 +396,6 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) { out << endl; }else { out << querySeq->getName() << "\tno" << endl; - if (templateFileName == "self") { - Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned()); - runFilter(temp); - userTemplate.push_back(temp); - } } return trim; @@ -417,46 +444,48 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftP m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine(); outAcc << querySeq->getName() << endl; + if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); } + if (trimChimera) { string newAligned = trim->getAligned(); //right side is fine so keep that if ((leftChimeric) && (!rightChimeric)) { - for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that - for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //both sides are chimeric, keep longest piece - int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart]; - int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart]; + int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart; + int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart; int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4 int length = lengthLeftLeft; if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; } - int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart]; - int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart]; + int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart; + int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart; if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; } if (lengthRightRight > length) { longest = 4; } if (longest == 1) { //leftleft - for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else if (longest == 2) { //leftright //get rid of leftleft - for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; } + for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; } //get rid of right - for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else if (longest == 3) { //rightleft //get rid of left - for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } //get rid of rightright - for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //rightright //get rid of left - for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } //get rid of rightleft - for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; } + for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; } } } @@ -470,11 +499,6 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftP out << endl; }else { out << querySeq->getName() << "\tno" << endl; - if (templateFileName == "self") { - Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned()); - runFilter(temp); - userTemplate.push_back(temp); - } } return trim; @@ -528,10 +552,12 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } } if (rightChimeric || leftChimeric) { - cout << querySeq->getName() << "\tyes" << endl; +// cout << querySeq->getName() << "\tyes" << endl; outAccString += querySeq->getName() + "\n"; results = true; + if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); } + //write to accnos file int length = outAccString.length(); char* buf2 = new char[length]; @@ -545,41 +571,41 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef //right side is fine so keep that if ((leftChimeric) && (!rightChimeric)) { - for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that - for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //both sides are chimeric, keep longest piece - int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart]; - int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart]; + int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart; + int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart; int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4 int length = lengthLeftLeft; if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; } - int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart]; - int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart]; + int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart; + int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart; if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; } if (lengthRightRight > length) { longest = 4; } if (longest == 1) { //leftleft - for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else if (longest == 2) { //leftright //get rid of leftleft - for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; } + for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; } //get rid of right - for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else if (longest == 3) { //rightleft //get rid of left - for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } //get rid of rightright - for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //rightright //get rid of left - for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } //get rid of rightleft - for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; } + for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; } } } @@ -610,12 +636,6 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); delete buf; - - if (template == "self") { - Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned()); - runFilter(temp); - userTemplate.push_back(temp); - } } @@ -650,6 +670,8 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { outAccString += querySeq->getName() + "\n"; results = true; + if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); } + //write to accnos file int length = outAccString.length(); char* buf2 = new char[length]; @@ -659,14 +681,14 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { delete buf2; if (trimChimera) { - int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart]; - int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart]; + int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart; + int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart; string newAligned = trim->getAligned(); if (lengthLeft > lengthRight) { //trim right - for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //trim left - for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; } + for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; } } trim->setAligned(newAligned); } @@ -694,12 +716,6 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); delete buf; - - if (template == "self") { - Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned()); - runFilter(temp); - userTemplate.push_back(temp); - } } return trim; @@ -720,25 +736,24 @@ int ChimeraSlayer::getChimeras(Sequence* query) { chimeraFlags = "no"; printResults.flag = "no"; - - //filter query - spotMap = runFilter(query); - printResults.spotMap = spotMap; querySeq = query; //you must create a template vector thisTemplate; - if (templateFileName != "self") { thisTemplate = templateSeqs; } - else { getTemplate(query); thisTemplate = userTemplate; } //fills this template and creates the databases + vector thisFilteredTemplate; + if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; } + else { thisTemplate = getTemplate(query, thisFilteredTemplate); } //fills this template and creates the databases if (m->control_pressed) { return 0; } if (thisTemplate.size() == 0) { return 0; } //not chimeric - //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity - Maligner maligner(thisTemplate, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight); - Slayer slayer(window, increment, minSim, divR, iters, minSNP); + //moved this out of maligner - 4/29/11 + vector refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate); + + Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); + Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS); if (templateFileName == "self") { if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } @@ -748,18 +763,26 @@ int ChimeraSlayer::getChimeras(Sequence* query) { if (m->control_pressed) { return 0; } string chimeraFlag = maligner.getResults(query, decalc); - + if (m->control_pressed) { return 0; } vector Results = maligner.getOutput(); - - if (realign) { - ChimeraReAligner realigner(thisTemplate, match, misMatch); - realigner.reAlign(query, Results); - } - + + for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } + if (chimeraFlag == "yes") { - + + if (realign) { + vector parents; + for (int i = 0; i < Results.size(); i++) { + parents.push_back(Results[i].parentAligned); + } + + ChimeraReAligner realigner; + realigner.reAlign(query, parents); + + } + //get sequence that were given from maligner results vector seqs; map removeDups; @@ -802,25 +825,15 @@ int ChimeraSlayer::getChimeras(Sequence* query) { seqs.pop_back(); } } - + //put seqs into vector to send to slayer - vector seqsForSlayer; - for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); } +// cout << query->getAligned() << endl; + vector seqsForSlayer; + for (int k = 0; k < seqs.size(); k++) { +// cout << seqs[k].seq->getAligned() << endl; + seqsForSlayer.push_back(seqs[k].seq); - //mask then send to slayer... - if (seqMask != "") { - decalc->setMask(seqMask); - - //mask querys - decalc->runMask(query); - - //mask parents - for (int k = 0; k < seqsForSlayer.size(); k++) { - decalc->runMask(seqsForSlayer[k]); - } - - spotMap = decalc->getMaskMap(); } if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; } @@ -830,14 +843,13 @@ int ChimeraSlayer::getChimeras(Sequence* query) { if (m->control_pressed) { return 0; } chimeraResults = slayer.getOutput(); - //free memory - for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } - - printResults.spotMap = spotMap; printResults.flag = chimeraFlags; printResults.results = chimeraResults; + + //free memory + for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } } - + //cout << endl << endl; return 0; } catch(exception& e) { @@ -854,7 +866,7 @@ void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){ out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t'; out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t'; - out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t'; + out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t'; } catch(exception& e) { @@ -873,7 +885,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t'; out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t'; - out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t'; + out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t'; }else if ((!leftChimeric) && (rightChimeric)) { //print right out << querySeq->getName() << '\t'; @@ -882,7 +894,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t'; out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t'; - out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t'; + out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t'; }else { //print both results if (leftdata.flag == "yes") { @@ -892,7 +904,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t'; out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t'; - out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t'; + out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t'; } if (rightdata.flag == "yes") { @@ -904,7 +916,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t'; out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t'; - out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t'; + out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t'; } } @@ -927,7 +939,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t"; out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t"; - out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t"; + out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t"; }else if ((!leftChimeric) && (rightChimeric)) { //print right out += querySeq->getName() + "\t"; @@ -936,7 +948,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t"; out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t"; - out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t"; + out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t"; }else { //print both results @@ -947,7 +959,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t"; out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t"; - out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t"; + out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t"; } if (rightdata.flag == "yes") { @@ -958,7 +970,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t"; out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t"; - out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t"; + out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t"; } } @@ -982,7 +994,7 @@ string ChimeraSlayer::getBlock(data_struct data, string flag){ outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t"; outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t"; - outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t"; + outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t"; return outputString; } @@ -991,5 +1003,172 @@ string ChimeraSlayer::getBlock(data_struct data, string flag){ exit(1); } } +//*************************************************************************************************************** +vector ChimeraSlayer::getRefSeqs(Sequence* q, vector& thisTemplate, vector& thisFilteredTemplate){ + try { + + vector refSeqs; + + if (searchMethod == "distance") { + //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate + Sequence* newSeq = new Sequence(q->getName(), q->getAligned()); + runFilter(newSeq); + refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim); + delete newSeq; + }else if (searchMethod == "blast") { + refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes + }else if (searchMethod == "kmer") { + refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes + }else { m->mothurOut("not valid search."); exit(1); } //should never get here + + return refSeqs; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "getRefSeqs"); + exit(1); + } +} //***************************************************************************************************************/ +vector ChimeraSlayer::getBlastSeqs(Sequence* q, vector& db, int num) { + try { + + vector refResults; + + //get parts of query + string queryUnAligned = q->getUnaligned(); + string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence + string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence +//cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl; + Sequence* queryLeft = new Sequence(q->getName(), leftQuery); + Sequence* queryRight = new Sequence(q->getName(), rightQuery); + + vector tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim); + vector tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim); + //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl; + vector smaller; + vector larger; + + if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; } + else { smaller = tempIndexesLeft; larger = tempIndexesRight; } + + //merge results + map seen; + map::iterator it; + vector mergedResults; + for (int i = 0; i < smaller.size(); i++) { + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add left if you havent already + it = seen.find(smaller[i]); + if (it == seen.end()) { + mergedResults.push_back(smaller[i]); + seen[smaller[i]] = smaller[i]; + } + + //add right if you havent already + it = seen.find(larger[i]); + if (it == seen.end()) { + mergedResults.push_back(larger[i]); + seen[larger[i]] = larger[i]; + } + } + + for (int i = smaller.size(); i < larger.size(); i++) { + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add right if you havent already + it = seen.find(larger[i]); + if (it == seen.end()) { + mergedResults.push_back(larger[i]); + seen[larger[i]] = larger[i]; + } + } + + for (int i = 0; i < mergedResults.size(); i++) { + //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl; + if (db[mergedResults[i]]->getName() != q->getName()) { + Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); + refResults.push_back(temp); + + } + } + + +// for(int i=0;igetName() << endl; +// } + + delete queryRight; + delete queryLeft; + + return refResults; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "getBlastSeqs"); + exit(1); + } +} +//*************************************************************************************************************** +vector ChimeraSlayer::getKmerSeqs(Sequence* q, vector& db, int num) { + try { + vector refResults; + + //get parts of query + string queryUnAligned = q->getUnaligned(); + string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence + string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence + + Sequence* queryLeft = new Sequence(q->getName(), leftQuery); + Sequence* queryRight = new Sequence(q->getName(), rightQuery); + + vector tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num); + vector tempIndexesRight = databaseRight->findClosestSequences(queryRight, num); + + //merge results + map seen; + map::iterator it; + vector mergedResults; + for (int i = 0; i < tempIndexesLeft.size(); i++) { + + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add left if you havent already + it = seen.find(tempIndexesLeft[i]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesLeft[i]); + seen[tempIndexesLeft[i]] = tempIndexesLeft[i]; + } + + //add right if you havent already + it = seen.find(tempIndexesRight[i]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesRight[i]); + seen[tempIndexesRight[i]] = tempIndexesRight[i]; + } + } + + //numWanted = mergedResults.size(); + + //cout << q->getName() << endl; + + for (int i = 0; i < mergedResults.size(); i++) { + //cout << db[mergedResults[i]]->getName() << endl; + if (db[mergedResults[i]]->getName() != q->getName()) { + Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); + refResults.push_back(temp); + } + } + //cout << endl; + delete queryRight; + delete queryLeft; + + return refResults; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "getKmerSeqs"); + exit(1); + } +} +//*************************************************************************************************************** +