X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=chimeraslayer.cpp;h=102db7478223d0027e676304288552a2148966fb;hp=1f7160aa0d91e8f049fbca4c8b5a85c0bed349d9;hb=df7e3ff9f68ef157b0328a2d353c3258c5d45d89;hpb=4c16a1dac0538d5ba2ac925674747ab174612ab8 diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 1f7160a..102db74 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -14,7 +14,7 @@ //*************************************************************************************************************** ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, -int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() { +int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() { try { fastafile = file; templateFileName = temp; templateSeqs = readSeqs(temp); @@ -34,9 +34,10 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num numWanted = numw; realign = r; trimChimera = trim; + numNoParents = 0; + blastlocation = blas; + threadID = tid; - decalc = new DeCalculator(); - doPrep(); } catch(exception& e) { @@ -45,9 +46,11 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num } } //*************************************************************************************************************** +//template=self, byGroup parameter used for mpienabled version to read the template as MPI_COMM_SELF instead of MPI_COMM_WORLD ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map& prior, string mode, int k, int ms, int mms, int win, float div, - int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() { + int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : Chimera() { try { + byGroup = bg; fastafile = file; templateSeqs = readSeqs(fastafile); templateFileName = temp; searchMethod = mode; @@ -67,15 +70,74 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, mapcontrol_pressed) { break; } runFilter(templateSeqs[i]); } - + if (searchMethod == "distance") { + //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap + + //run filter on template copying templateSeqs into filteredTemplateSeqs + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { break; } + + Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned()); + runFilter(newSeq); + filteredTemplateSeqs.push_back(newSeq); + } + } + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); + exit(1); + } +} +//*************************************************************************************************************** +//template=self +ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map& prior, string mode, int k, int ms, int mms, int win, float div, + int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() { + try { + fastafile = file; templateSeqs = readSeqs(fastafile); + templateFileName = temp; + searchMethod = mode; + kmerSize = k; + match = ms; + misMatch = mms; + window = win; + divR = div; + minSim = minsim; + minCov = mincov; + minBS = minbs; + minSNP = minsnp; + parents = par; + iters = it; + increment = inc; + numWanted = numw; + realign = r; + trimChimera = trim; + priority = prior; + numNoParents = 0; + blastlocation = blas; + threadID = tid; + + + createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap + if (searchMethod == "distance") { + //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap + + //run filter on template copying templateSeqs into filteredTemplateSeqs + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { break; } + + Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned()); + runFilter(newSeq); + filteredTemplateSeqs.push_back(newSeq); + } + } } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); @@ -209,7 +271,9 @@ int ChimeraSlayer::doPrep() { }else if (searchMethod == "blast") { //generate blastdb - databaseLeft = new BlastDB(-1.0, -1.0, 1, -3); + databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID); + + if (m->control_pressed) { return 0; } for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); } databaseLeft->generateDB(); @@ -225,14 +289,14 @@ int ChimeraSlayer::doPrep() { } } //*************************************************************************************************************** -vector ChimeraSlayer::getTemplate(Sequence* q, vector& userTemplateFiltered) { +vector ChimeraSlayer::getTemplate(Sequence q, vector& userTemplateFiltered) { try { //when template=self, the query file is sorted from most abundance to least abundant //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant vector userTemplate; - int myAbund = priority[q->getName()]; + int myAbund = priority[q.getName()]; for (int i = 0; i < templateSeqs.size(); i++) { @@ -248,6 +312,11 @@ vector ChimeraSlayer::getTemplate(Sequence* q, vector& use } } + //avoids nuisance error from formatdb for making blank blast database + if (userTemplate.size() == 0) { + return userTemplate; + } + string kmerDBNameLeft; string kmerDBNameRight; @@ -316,7 +385,9 @@ vector ChimeraSlayer::getTemplate(Sequence* q, vector& use }else if (searchMethod == "blast") { //generate blastdb - databaseLeft = new BlastDB(-1.0, -1.0, 1, -3); + databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID); + + if (m->control_pressed) { return userTemplate; } for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); } databaseLeft->generateDB(); @@ -334,7 +405,6 @@ vector ChimeraSlayer::getTemplate(Sequence* q, vector& use //*************************************************************************************************************** ChimeraSlayer::~ChimeraSlayer() { - delete decalc; if (templateFileName != "self") { if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } else if (searchMethod == "blast") { delete databaseLeft; } @@ -349,10 +419,10 @@ void ChimeraSlayer::printHeader(ostream& out) { out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n"; } //*************************************************************************************************************** -Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) { +Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) { try { - Sequence* trim = NULL; - if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); } + Sequence trim; + if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); } if (chimeraFlags == "yes") { string chimeraFlag = "no"; @@ -363,23 +433,23 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) { if (chimeraFlag == "yes") { if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) { - m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine(); - outAcc << querySeq->getName() << endl; + m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine(); + outAcc << querySeq.getName() << endl; - if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); } + if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } if (trimChimera) { int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart; int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart; - string newAligned = trim->getAligned(); + string newAligned = trim.getAligned(); if (lengthLeft > lengthRight) { //trim right for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //trim left for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; } } - trim->setAligned(newAligned); + trim.setAligned(newAligned); } } } @@ -387,7 +457,7 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) { printBlock(chimeraResults[0], chimeraFlag, out); out << endl; }else { - out << querySeq->getName() << "\tno" << endl; + out << querySeq.getName() << "\tno" << endl; } return trim; @@ -399,13 +469,13 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) { } } //*************************************************************************************************************** -Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) { +Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) { try { - Sequence* trim = NULL; + Sequence trim; if (trimChimera) { string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned(); - trim = new Sequence(leftPiece.trimQuery.getName(), aligned); + trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); } if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) { @@ -433,13 +503,13 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftP if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } } if (rightChimeric || leftChimeric) { - m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine(); - outAcc << querySeq->getName() << endl; + m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine(); + outAcc << querySeq.getName() << endl; - if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); } + if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } if (trimChimera) { - string newAligned = trim->getAligned(); + string newAligned = trim.getAligned(); //right side is fine so keep that if ((leftChimeric) && (!rightChimeric)) { @@ -481,7 +551,7 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftP } } - trim->setAligned(newAligned); + trim.setAligned(newAligned); } } @@ -490,7 +560,7 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftP printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out); out << endl; }else { - out << querySeq->getName() << "\tno" << endl; + out << querySeq.getName() << "\tno" << endl; } return trim; @@ -504,18 +574,19 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftP #ifdef USE_MPI //*************************************************************************************************************** -Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) { +Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece, bool& chimFlag) { try { MPI_Status status; bool results = false; string outAccString = ""; string outputString = ""; + chimFlag = false; - Sequence* trim = NULL; + Sequence trim; if (trimChimera) { string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned(); - trim = new Sequence(leftPiece.trimQuery.getName(), aligned); + trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); } @@ -537,6 +608,8 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef bool rightChimeric = false; bool leftChimeric = false; + + cout << endl; if (chimeraFlag == "yes") { //which peice is chimeric or are both @@ -544,11 +617,11 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } } if (rightChimeric || leftChimeric) { - cout << querySeq->getName() << "\tyes" << endl; - outAccString += querySeq->getName() + "\n"; + cout << querySeq.getName() << "\tyes" << endl; + outAccString += querySeq.getName() + "\n"; results = true; - if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); } + if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } //write to accnos file int length = outAccString.length(); @@ -556,10 +629,11 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef memcpy(buf2, outAccString.c_str(), length); MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status); + chimFlag = true; delete buf2; if (trimChimera) { - string newAligned = trim->getAligned(); + string newAligned = trim.getAligned(); //right side is fine so keep that if ((leftChimeric) && (!rightChimeric)) { @@ -601,7 +675,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef } } - trim->setAligned(newAligned); + trim.setAligned(newAligned); } } @@ -619,7 +693,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef delete buf; }else { - outputString += querySeq->getName() + "\tno\n"; + outputString += querySeq.getName() + "\tno\n"; //write to output file int length = outputString.length(); @@ -639,15 +713,15 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef } } //*************************************************************************************************************** -Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { +Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { try { MPI_Status status; bool results = false; string outAccString = ""; string outputString = ""; - Sequence* trim = NULL; - if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); } + Sequence trim; + if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); } if (chimeraFlags == "yes") { string chimeraFlag = "no"; @@ -658,11 +732,11 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { if (chimeraFlag == "yes") { if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) { - cout << querySeq->getName() << "\tyes" << endl; - outAccString += querySeq->getName() + "\n"; + cout << querySeq.getName() << "\tyes" << endl; + outAccString += querySeq.getName() + "\n"; results = true; - if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); } + if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } //write to accnos file int length = outAccString.length(); @@ -676,13 +750,13 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart; int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart; - string newAligned = trim->getAligned(); + string newAligned = trim.getAligned(); if (lengthLeft > lengthRight) { //trim right for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //trim left for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; } } - trim->setAligned(newAligned); + trim.setAligned(newAligned); } } } @@ -699,7 +773,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { delete buf; }else { - outputString += querySeq->getName() + "\tno\n"; + outputString += querySeq.getName() + "\tno\n"; //write to output file int length = outputString.length(); @@ -729,20 +803,19 @@ int ChimeraSlayer::getChimeras(Sequence* query) { chimeraFlags = "no"; printResults.flag = "no"; - querySeq = query; + querySeq = *query; //you must create a template vector thisTemplate; vector thisFilteredTemplate; if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; } - else { thisTemplate = getTemplate(query, thisFilteredTemplate); } //fills this template and creates the databases + else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases if (m->control_pressed) { return 0; } - if (thisTemplate.size() == 0) { return 0; } //not chimeric //moved this out of maligner - 4/29/11 - vector refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate); + vector refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate); Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS); @@ -754,82 +827,93 @@ int ChimeraSlayer::getChimeras(Sequence* query) { if (m->control_pressed) { return 0; } - string chimeraFlag = maligner.getResults(query, decalc); + string chimeraFlag = maligner.getResults(*query, decalc); if (m->control_pressed) { return 0; } vector Results = maligner.getOutput(); - for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } + //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } if (chimeraFlag == "yes") { - + if (realign) { - vector parents; + vector parents; for (int i = 0; i < Results.size(); i++) { -cout << Results[i].parent << '\t' << Results[i].nastRegionStart << '\t' << Results[i].nastRegionEnd << endl; - Sequence* parent = new Sequence(Results[i].parent, Results[i].parentAligned); - - parents.push_back(parent); + parents.push_back(Results[i].parentAligned); } - ChimeraReAligner realigner; - //realigner.reAlign(query, parents); - - for (int i = 0; i < parents.size(); i++) { delete parents[i]; } + ChimeraReAligner realigner; + realigner.reAlign(query, parents); + } - //query->printSequence(cout); + +// cout << query->getAligned() << endl; //get sequence that were given from maligner results - vector seqs; + vector seqs; map removeDups; map::iterator itDup; map parentNameSeq; map::iterator itSeq; for (int j = 0; j < Results.size(); j++) { + float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal; //only add if you are not a duplicate - itDup = removeDups.find(Results[j].parent); - if (itDup == removeDups.end()) { //this is not duplicate - removeDups[Results[j].parent] = dist; - parentNameSeq[Results[j].parent] = Results[j].parentAligned; - }else if (dist > itDup->second) { //is this a stronger number for this parent - removeDups[Results[j].parent] = dist; - parentNameSeq[Results[j].parent] = Results[j].parentAligned; +// cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl; + + + if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity + + itDup = removeDups.find(Results[j].parent); + if (itDup == removeDups.end()) { //this is not duplicate + removeDups[Results[j].parent] = dist; + parentNameSeq[Results[j].parent] = Results[j].parentAligned; + }else if (dist > itDup->second) { //is this a stronger number for this parent + removeDups[Results[j].parent] = dist; + parentNameSeq[Results[j].parent] = Results[j].parentAligned; + } + } + } for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) { itSeq = parentNameSeq.find(itDup->first); - Sequence* seq = new Sequence(itDup->first, itSeq->second); + Sequence seq(itDup->first, itSeq->second); - SeqDist member; + SeqCompare member; member.seq = seq; member.dist = itDup->second; - seqs.push_back(member); } //limit number of parents to explore - default 3 if (Results.size() > parents) { //sort by distance - sort(seqs.begin(), seqs.end(), compareSeqDist); + sort(seqs.begin(), seqs.end(), compareSeqCompare); //prioritize larger more similiar sequence fragments reverse(seqs.begin(), seqs.end()); - for (int k = seqs.size()-1; k > (parents-1); k--) { - delete seqs[k].seq; - seqs.pop_back(); - } + //for (int k = seqs.size()-1; k > (parents-1); k--) { + // delete seqs[k].seq; + //seqs.pop_back(); + //} } //put seqs into vector to send to slayer - vector seqsForSlayer; - for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); } - if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; } +// cout << query->getAligned() << endl; + vector seqsForSlayer; + for (int k = 0; k < seqs.size(); k++) { +// cout << seqs[k].seq->getAligned() << endl; + seqsForSlayer.push_back(seqs[k].seq); +// cout << seqs[k].seq->getName() << endl; + } + + if (m->control_pressed) { return 0; } //send to slayer - chimeraFlags = slayer.getResults(query, seqsForSlayer); + chimeraFlags = slayer.getResults(*query, seqsForSlayer); if (m->control_pressed) { return 0; } chimeraResults = slayer.getOutput(); @@ -837,9 +921,9 @@ cout << Results[i].parent << '\t' << Results[i].nastRegionStart << '\t' << Resu printResults.results = chimeraResults; //free memory - for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } + //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } } - + //cout << endl << endl; return 0; } catch(exception& e) { @@ -850,7 +934,7 @@ cout << Results[i].parent << '\t' << Results[i].nastRegionStart << '\t' << Resu //*************************************************************************************************************** void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){ try { - out << querySeq->getName() << '\t'; + out << querySeq.getName() << '\t'; out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t'; out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t'; @@ -869,7 +953,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo try { if ((leftChimeric) && (!rightChimeric)) { //print left - out << querySeq->getName() << '\t'; + out << querySeq.getName() << '\t'; out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t'; out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t'; @@ -878,7 +962,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t'; }else if ((!leftChimeric) && (rightChimeric)) { //print right - out << querySeq->getName() << '\t'; + out << querySeq.getName() << '\t'; out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t'; out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t'; @@ -888,7 +972,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo }else { //print both results if (leftdata.flag == "yes") { - out << querySeq->getName() + "_LEFT" << '\t'; + out << querySeq.getName() + "_LEFT" << '\t'; out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t'; out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t'; @@ -900,7 +984,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo if (rightdata.flag == "yes") { if (leftdata.flag == "yes") { out << endl; } - out << querySeq->getName() + "_RIGHT"<< '\t'; + out << querySeq.getName() + "_RIGHT"<< '\t'; out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t'; out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t'; @@ -923,7 +1007,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo string out = ""; if ((leftChimeric) && (!rightChimeric)) { //get left - out += querySeq->getName() + "\t"; + out += querySeq.getName() + "\t"; out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t"; out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t"; @@ -932,7 +1016,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t"; }else if ((!leftChimeric) && (rightChimeric)) { //print right - out += querySeq->getName() + "\t"; + out += querySeq.getName() + "\t"; out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t"; out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t"; @@ -943,7 +1027,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo }else { //print both results if (leftdata.flag == "yes") { - out += querySeq->getName() + "_LEFT\t"; + out += querySeq.getName() + "_LEFT\t"; out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t"; out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t"; @@ -954,7 +1038,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo if (rightdata.flag == "yes") { if (leftdata.flag == "yes") { out += "\n"; } - out += querySeq->getName() + "_RIGHT\t"; + out += querySeq.getName() + "_RIGHT\t"; out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t"; out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t"; @@ -978,7 +1062,7 @@ string ChimeraSlayer::getBlock(data_struct data, string flag){ string outputString = ""; - outputString += querySeq->getName() + "\t"; + outputString += querySeq.getName() + "\t"; outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t"; outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t"; @@ -994,16 +1078,16 @@ string ChimeraSlayer::getBlock(data_struct data, string flag){ } } //*************************************************************************************************************** -vector ChimeraSlayer::getRefSeqs(Sequence* q, vector& thisTemplate, vector& thisFilteredTemplate){ +vector ChimeraSlayer::getRefSeqs(Sequence q, vector& thisTemplate, vector& thisFilteredTemplate){ try { - vector refSeqs; + vector refSeqs; if (searchMethod == "distance") { //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate - Sequence* newSeq = new Sequence(q->getName(), q->getAligned()); + Sequence* newSeq = new Sequence(q.getName(), q.getAligned()); runFilter(newSeq); - refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted); + refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim); delete newSeq; }else if (searchMethod == "blast") { refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes @@ -1019,70 +1103,93 @@ vector ChimeraSlayer::getRefSeqs(Sequence* q, vector& this } } //***************************************************************************************************************/ -vector ChimeraSlayer::getBlastSeqs(Sequence* q, vector& db, int num) { +vector ChimeraSlayer::getBlastSeqs(Sequence q, vector& db, int num) { try { - vector refResults; + vector refResults; //get parts of query - string queryUnAligned = q->getUnaligned(); + string queryUnAligned = q.getUnaligned(); string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence +//cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl; + Sequence* queryLeft = new Sequence(q.getName(), leftQuery); + Sequence* queryRight = new Sequence(q.getName(), rightQuery); - Sequence* queryLeft = new Sequence(q->getName()+"left", leftQuery); - Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery); - - vector tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1); - vector tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1); - - vector smaller; - vector larger; + vector tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim); + vector tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim); + - if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; } - else { smaller = tempIndexesLeft; larger = tempIndexesRight; } + //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl; +// vector smaller; +// vector larger; +// +// if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; } +// else { smaller = tempIndexesLeft; larger = tempIndexesRight; } //merge results map seen; map::iterator it; vector mergedResults; - for (int i = 0; i < smaller.size(); i++) { + + int index = 0; +// for (int i = 0; i < smaller.size(); i++) { + while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){ + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } //add left if you havent already - it = seen.find(smaller[i]); + it = seen.find(tempIndexesLeft[index]); if (it == seen.end()) { - mergedResults.push_back(smaller[i]); - seen[smaller[i]] = smaller[i]; + mergedResults.push_back(tempIndexesLeft[index]); + seen[tempIndexesLeft[index]] = tempIndexesLeft[index]; } //add right if you havent already - it = seen.find(larger[i]); + it = seen.find(tempIndexesRight[index]); if (it == seen.end()) { - mergedResults.push_back(larger[i]); - seen[larger[i]] = larger[i]; + mergedResults.push_back(tempIndexesRight[index]); + seen[tempIndexesRight[index]] = tempIndexesRight[index]; } + index++; } + - for (int i = smaller.size(); i < larger.size(); i++) { + for (int i = index; i < tempIndexesLeft.size(); i++) { if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } //add right if you havent already - it = seen.find(larger[i]); + it = seen.find(tempIndexesLeft[i]); if (it == seen.end()) { - mergedResults.push_back(larger[i]); - seen[larger[i]] = larger[i]; + mergedResults.push_back(tempIndexesLeft[i]); + seen[tempIndexesLeft[i]] = tempIndexesLeft[i]; } } + for (int i = index; i < tempIndexesRight.size(); i++) { + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add right if you havent already + it = seen.find(tempIndexesRight[i]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesRight[i]); + seen[tempIndexesRight[i]] = tempIndexesRight[i]; + } + } + //string qname = q->getName().substr(0, q->getName().find_last_of('_')); + //cout << qname << endl; + + if (mergedResults.size() == 0) { numNoParents++; } + for (int i = 0; i < mergedResults.size(); i++) { - //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl; - if (db[mergedResults[i]]->getName() != q->getName()) { - Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); + //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl; + if (db[mergedResults[i]]->getName() != q.getName()) { + Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); refResults.push_back(temp); - } } - + //cout << endl << endl; + delete queryRight; delete queryLeft; @@ -1094,17 +1201,17 @@ vector ChimeraSlayer::getBlastSeqs(Sequence* q, vector& db } } //*************************************************************************************************************** -vector ChimeraSlayer::getKmerSeqs(Sequence* q, vector& db, int num) { +vector ChimeraSlayer::getKmerSeqs(Sequence q, vector& db, int num) { try { - vector refResults; + vector refResults; //get parts of query - string queryUnAligned = q->getUnaligned(); + string queryUnAligned = q.getUnaligned(); string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence - Sequence* queryLeft = new Sequence(q->getName(), leftQuery); - Sequence* queryRight = new Sequence(q->getName(), rightQuery); + Sequence* queryLeft = new Sequence(q.getName(), leftQuery); + Sequence* queryRight = new Sequence(q.getName(), rightQuery); vector tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num); vector tempIndexesRight = databaseRight->findClosestSequences(queryRight, num); @@ -1112,17 +1219,44 @@ vector ChimeraSlayer::getKmerSeqs(Sequence* q, vector& db, //merge results map seen; map::iterator it; - vector mergedResults; - for (int i = 0; i < tempIndexesLeft.size(); i++) { + vector mergedResults; + + int index = 0; + // for (int i = 0; i < smaller.size(); i++) { + while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){ if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } //add left if you havent already + it = seen.find(tempIndexesLeft[index]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesLeft[index]); + seen[tempIndexesLeft[index]] = tempIndexesLeft[index]; + } + + //add right if you havent already + it = seen.find(tempIndexesRight[index]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesRight[index]); + seen[tempIndexesRight[index]] = tempIndexesRight[index]; + } + index++; + } + + + for (int i = index; i < tempIndexesLeft.size(); i++) { + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add right if you havent already it = seen.find(tempIndexesLeft[i]); if (it == seen.end()) { mergedResults.push_back(tempIndexesLeft[i]); seen[tempIndexesLeft[i]] = tempIndexesLeft[i]; } + } + + for (int i = index; i < tempIndexesRight.size(); i++) { + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } //add right if you havent already it = seen.find(tempIndexesRight[i]); @@ -1132,17 +1266,15 @@ vector ChimeraSlayer::getKmerSeqs(Sequence* q, vector& db, } } - //numWanted = mergedResults.size(); - - //cout << q->getName() << endl; - for (int i = 0; i < mergedResults.size(); i++) { - //cout << db[mergedResults[i]]->getName() << endl; - if (db[mergedResults[i]]->getName() != q->getName()) { - Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); + //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl; + if (db[mergedResults[i]]->getName() != q.getName()) { + Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); refResults.push_back(temp); + } } + //cout << endl; delete queryRight; delete queryLeft;