5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
71 decalc = new DeCalculator();
73 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
75 //run filter on template
76 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { break; } runFilter(templateSeqs[i]); }
81 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
85 //***************************************************************************************************************
86 int ChimeraSlayer::doPrep() {
89 //read in all query seqs
90 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
92 vector<Sequence*> temp = templateSeqs;
93 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
95 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
97 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
99 if (m->control_pressed) { return 0; }
101 //run filter on template
102 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
104 string kmerDBNameLeft;
105 string kmerDBNameRight;
107 //generate the kmerdb to pass to maligner
108 if (searchMethod == "kmer") {
109 string templatePath = m->hasPath(templateFileName);
110 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
111 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
113 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
114 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
116 for (int i = 0; i < templateSeqs.size(); i++) {
118 if (m->control_pressed) { return 0; }
120 string leftFrag = templateSeqs[i]->getUnaligned();
121 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
123 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
124 databaseLeft->addSequence(leftTemp);
126 databaseLeft->generateDB();
127 databaseLeft->setNumSeqs(templateSeqs.size());
129 for (int i = 0; i < templateSeqs.size(); i++) {
130 if (m->control_pressed) { return 0; }
132 string rightFrag = templateSeqs[i]->getUnaligned();
133 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
135 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
136 databaseRight->addSequence(rightTemp);
138 databaseRight->generateDB();
139 databaseRight->setNumSeqs(templateSeqs.size());
143 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
144 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
145 bool needToGenerateLeft = true;
147 if(kmerFileTestLeft){
148 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
149 if (GoodFile) { needToGenerateLeft = false; }
152 if(needToGenerateLeft){
154 for (int i = 0; i < templateSeqs.size(); i++) {
156 if (m->control_pressed) { return 0; }
158 string leftFrag = templateSeqs[i]->getUnaligned();
159 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
161 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
162 databaseLeft->addSequence(leftTemp);
164 databaseLeft->generateDB();
167 databaseLeft->readKmerDB(kmerFileTestLeft);
169 kmerFileTestLeft.close();
171 databaseLeft->setNumSeqs(templateSeqs.size());
174 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
175 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
176 bool needToGenerateRight = true;
178 if(kmerFileTestRight){
179 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
180 if (GoodFile) { needToGenerateRight = false; }
183 if(needToGenerateRight){
185 for (int i = 0; i < templateSeqs.size(); i++) {
186 if (m->control_pressed) { return 0; }
188 string rightFrag = templateSeqs[i]->getUnaligned();
189 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
191 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
192 databaseRight->addSequence(rightTemp);
194 databaseRight->generateDB();
197 databaseRight->readKmerDB(kmerFileTestRight);
199 kmerFileTestRight.close();
201 databaseRight->setNumSeqs(templateSeqs.size());
203 }else if (searchMethod == "blast") {
206 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
208 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
209 databaseLeft->generateDB();
210 databaseLeft->setNumSeqs(templateSeqs.size());
216 catch(exception& e) {
217 m->errorOut(e, "ChimeraSlayer", "doprep");
221 //***************************************************************************************************************
222 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
225 //when template=self, the query file is sorted from most abundance to least abundant
226 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
227 vector<Sequence*> userTemplate;
229 int myAbund = priority[q->getName()];
231 for (int i = 0; i < templateSeqs.size(); i++) {
233 if (m->control_pressed) { return userTemplate; }
235 //have I reached a sequence with the same abundance as myself?
236 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
238 //if its am not chimeric add it
239 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { userTemplate.push_back(templateSeqs[i]); }
242 string kmerDBNameLeft;
243 string kmerDBNameRight;
245 //generate the kmerdb to pass to maligner
246 if (searchMethod == "kmer") {
247 string templatePath = m->hasPath(templateFileName);
248 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
249 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
251 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
252 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
254 for (int i = 0; i < userTemplate.size(); i++) {
256 if (m->control_pressed) { return userTemplate; }
258 string leftFrag = userTemplate[i]->getUnaligned();
259 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
261 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
262 databaseLeft->addSequence(leftTemp);
264 databaseLeft->generateDB();
265 databaseLeft->setNumSeqs(userTemplate.size());
267 for (int i = 0; i < userTemplate.size(); i++) {
268 if (m->control_pressed) { return userTemplate; }
270 string rightFrag = userTemplate[i]->getUnaligned();
271 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
273 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
274 databaseRight->addSequence(rightTemp);
276 databaseRight->generateDB();
277 databaseRight->setNumSeqs(userTemplate.size());
282 for (int i = 0; i < userTemplate.size(); i++) {
284 if (m->control_pressed) { return userTemplate; }
286 string leftFrag = userTemplate[i]->getUnaligned();
287 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
289 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
290 databaseLeft->addSequence(leftTemp);
292 databaseLeft->generateDB();
293 databaseLeft->setNumSeqs(userTemplate.size());
295 for (int i = 0; i < userTemplate.size(); i++) {
296 if (m->control_pressed) { return userTemplate; }
298 string rightFrag = userTemplate[i]->getUnaligned();
299 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
301 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
302 databaseRight->addSequence(rightTemp);
304 databaseRight->generateDB();
305 databaseRight->setNumSeqs(userTemplate.size());
307 }else if (searchMethod == "blast") {
310 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
312 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
313 databaseLeft->generateDB();
314 databaseLeft->setNumSeqs(userTemplate.size());
320 catch(exception& e) {
321 m->errorOut(e, "ChimeraSlayer", "getTemplate");
326 //***************************************************************************************************************
327 ChimeraSlayer::~ChimeraSlayer() {
329 if (templateFileName != "self") {
330 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
331 else if (searchMethod == "blast") { delete databaseLeft; }
334 //***************************************************************************************************************
335 void ChimeraSlayer::printHeader(ostream& out) {
336 m->mothurOutEndLine();
337 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
338 m->mothurOutEndLine();
340 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
342 //***************************************************************************************************************
343 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
345 Sequence* trim = NULL;
346 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
348 if (chimeraFlags == "yes") {
349 string chimeraFlag = "no";
350 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
352 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
355 if (chimeraFlag == "yes") {
356 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
357 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
358 outAcc << querySeq->getName() << endl;
360 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
363 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
364 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
366 string newAligned = trim->getAligned();
368 if (lengthLeft > lengthRight) { //trim right
369 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
371 for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; }
373 trim->setAligned(newAligned);
378 printBlock(chimeraResults[0], chimeraFlag, out);
381 out << querySeq->getName() << "\tno" << endl;
387 catch(exception& e) {
388 m->errorOut(e, "ChimeraSlayer", "print");
392 //***************************************************************************************************************
393 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
395 Sequence* trim = NULL;
398 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
399 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
402 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
404 string chimeraFlag = "no";
405 if (leftPiece.flag == "yes") {
407 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
409 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
412 if (rightPiece.flag == "yes") {
413 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
415 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
418 bool rightChimeric = false;
419 bool leftChimeric = false;
421 if (chimeraFlag == "yes") {
422 //which peice is chimeric or are both
423 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
424 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
426 if (rightChimeric || leftChimeric) {
427 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
428 outAcc << querySeq->getName() << endl;
430 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
433 string newAligned = trim->getAligned();
435 //right side is fine so keep that
436 if ((leftChimeric) && (!rightChimeric)) {
437 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
438 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
439 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
440 }else { //both sides are chimeric, keep longest piece
442 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
443 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
445 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
446 int length = lengthLeftLeft;
447 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
449 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
450 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
452 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
453 if (lengthRightRight > length) { longest = 4; }
455 if (longest == 1) { //leftleft
456 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
457 }else if (longest == 2) { //leftright
458 //get rid of leftleft
459 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
461 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
462 }else if (longest == 3) { //rightleft
464 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
465 //get rid of rightright
466 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
469 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
470 //get rid of rightleft
471 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
475 trim->setAligned(newAligned);
481 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
484 out << querySeq->getName() << "\tno" << endl;
490 catch(exception& e) {
491 m->errorOut(e, "ChimeraSlayer", "print");
497 //***************************************************************************************************************
498 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
501 bool results = false;
502 string outAccString = "";
503 string outputString = "";
505 Sequence* trim = NULL;
508 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
509 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
513 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
515 string chimeraFlag = "no";
516 if (leftPiece.flag == "yes") {
518 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
520 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
523 if (rightPiece.flag == "yes") {
524 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
526 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
529 bool rightChimeric = false;
530 bool leftChimeric = false;
532 if (chimeraFlag == "yes") {
533 //which peice is chimeric or are both
534 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
535 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
537 if (rightChimeric || leftChimeric) {
538 cout << querySeq->getName() << "\tyes" << endl;
539 outAccString += querySeq->getName() + "\n";
542 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
544 //write to accnos file
545 int length = outAccString.length();
546 char* buf2 = new char[length];
547 memcpy(buf2, outAccString.c_str(), length);
549 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
553 string newAligned = trim->getAligned();
555 //right side is fine so keep that
556 if ((leftChimeric) && (!rightChimeric)) {
557 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
558 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
559 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
560 }else { //both sides are chimeric, keep longest piece
562 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
563 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
565 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
566 int length = lengthLeftLeft;
567 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
569 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
570 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
572 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
573 if (lengthRightRight > length) { longest = 4; }
575 if (longest == 1) { //leftleft
576 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
577 }else if (longest == 2) { //leftright
578 //get rid of leftleft
579 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
581 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
582 }else if (longest == 3) { //rightleft
584 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
585 //get rid of rightright
586 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
589 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
590 //get rid of rightleft
591 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
595 trim->setAligned(newAligned);
601 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
602 outputString += "\n";
604 //write to output file
605 int length = outputString.length();
606 char* buf = new char[length];
607 memcpy(buf, outputString.c_str(), length);
609 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
613 outputString += querySeq->getName() + "\tno\n";
615 //write to output file
616 int length = outputString.length();
617 char* buf = new char[length];
618 memcpy(buf, outputString.c_str(), length);
620 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
627 catch(exception& e) {
628 m->errorOut(e, "ChimeraSlayer", "print");
632 //***************************************************************************************************************
633 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
636 bool results = false;
637 string outAccString = "";
638 string outputString = "";
640 Sequence* trim = NULL;
641 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
643 if (chimeraFlags == "yes") {
644 string chimeraFlag = "no";
645 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
647 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
650 if (chimeraFlag == "yes") {
651 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
652 cout << querySeq->getName() << "\tyes" << endl;
653 outAccString += querySeq->getName() + "\n";
656 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
658 //write to accnos file
659 int length = outAccString.length();
660 char* buf2 = new char[length];
661 memcpy(buf2, outAccString.c_str(), length);
663 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
667 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
668 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
670 string newAligned = trim->getAligned();
671 if (lengthLeft > lengthRight) { //trim right
672 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
674 for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
676 trim->setAligned(newAligned);
681 outputString = getBlock(chimeraResults[0], chimeraFlag);
682 outputString += "\n";
684 //write to output file
685 int length = outputString.length();
686 char* buf = new char[length];
687 memcpy(buf, outputString.c_str(), length);
689 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
693 outputString += querySeq->getName() + "\tno\n";
695 //write to output file
696 int length = outputString.length();
697 char* buf = new char[length];
698 memcpy(buf, outputString.c_str(), length);
700 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
706 catch(exception& e) {
707 m->errorOut(e, "ChimeraSlayer", "print");
713 //***************************************************************************************************************
714 int ChimeraSlayer::getChimeras(Sequence* query) {
717 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
718 printResults.trimQuery = trimQuery;
721 printResults.flag = "no";
724 spotMap = runFilter(query);
725 printResults.spotMap = spotMap;
729 //you must create a template
730 vector<Sequence*> thisTemplate;
731 if (templateFileName != "self") { thisTemplate = templateSeqs; }
732 else { thisTemplate = getTemplate(query); } //fills this template and creates the databases
734 if (m->control_pressed) { return 0; }
736 if (thisTemplate.size() == 0) { return 0; } //not chimeric
738 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
739 Maligner maligner(thisTemplate, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
740 Slayer slayer(window, increment, minSim, divR, iters, minSNP);
742 if (templateFileName == "self") {
743 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
744 else if (searchMethod == "blast") { delete databaseLeft; }
747 if (m->control_pressed) { return 0; }
749 string chimeraFlag = maligner.getResults(query, decalc);
751 if (m->control_pressed) { return 0; }
753 vector<results> Results = maligner.getOutput();
755 if (chimeraFlag == "yes") {
758 ChimeraReAligner realigner(thisTemplate, match, misMatch);
759 realigner.reAlign(query, Results);
762 //get sequence that were given from maligner results
763 vector<SeqDist> seqs;
764 map<string, float> removeDups;
765 map<string, float>::iterator itDup;
766 map<string, string> parentNameSeq;
767 map<string, string>::iterator itSeq;
768 for (int j = 0; j < Results.size(); j++) {
769 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
770 //only add if you are not a duplicate
771 itDup = removeDups.find(Results[j].parent);
772 if (itDup == removeDups.end()) { //this is not duplicate
773 removeDups[Results[j].parent] = dist;
774 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
775 }else if (dist > itDup->second) { //is this a stronger number for this parent
776 removeDups[Results[j].parent] = dist;
777 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
781 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
782 itSeq = parentNameSeq.find(itDup->first);
783 Sequence* seq = new Sequence(itDup->first, itSeq->second);
787 member.dist = itDup->second;
789 seqs.push_back(member);
792 //limit number of parents to explore - default 3
793 if (Results.size() > parents) {
795 sort(seqs.begin(), seqs.end(), compareSeqDist);
796 //prioritize larger more similiar sequence fragments
797 reverse(seqs.begin(), seqs.end());
799 for (int k = seqs.size()-1; k > (parents-1); k--) {
805 //put seqs into vector to send to slayer
806 vector<Sequence*> seqsForSlayer;
808 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
810 //mask then send to slayer...
812 decalc->setMask(seqMask);
815 decalc->runMask(query);
818 for (int k = 0; k < seqsForSlayer.size(); k++) {
819 decalc->runMask(seqsForSlayer[k]);
822 spotMap = decalc->getMaskMap();
825 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
828 chimeraFlags = slayer.getResults(query, seqsForSlayer);
829 if (m->control_pressed) { return 0; }
830 chimeraResults = slayer.getOutput();
833 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
835 printResults.spotMap = spotMap;
836 printResults.flag = chimeraFlags;
837 printResults.results = chimeraResults;
842 catch(exception& e) {
843 m->errorOut(e, "ChimeraSlayer", "getChimeras");
847 //***************************************************************************************************************
848 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
850 out << querySeq->getName() << '\t';
851 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
853 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
854 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
856 out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
859 catch(exception& e) {
860 m->errorOut(e, "ChimeraSlayer", "printBlock");
864 //***************************************************************************************************************
865 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
868 if ((leftChimeric) && (!rightChimeric)) { //print left
869 out << querySeq->getName() << '\t';
870 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
872 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
873 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
875 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
877 }else if ((!leftChimeric) && (rightChimeric)) { //print right
878 out << querySeq->getName() << '\t';
879 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
881 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
882 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
884 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
886 }else { //print both results
887 if (leftdata.flag == "yes") {
888 out << querySeq->getName() + "_LEFT" << '\t';
889 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
891 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
892 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
894 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
897 if (rightdata.flag == "yes") {
898 if (leftdata.flag == "yes") { out << endl; }
900 out << querySeq->getName() + "_RIGHT"<< '\t';
901 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
903 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
904 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
906 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
911 catch(exception& e) {
912 m->errorOut(e, "ChimeraSlayer", "printBlock");
916 //***************************************************************************************************************
917 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
922 if ((leftChimeric) && (!rightChimeric)) { //get left
923 out += querySeq->getName() + "\t";
924 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
926 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
927 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
929 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
931 }else if ((!leftChimeric) && (rightChimeric)) { //print right
932 out += querySeq->getName() + "\t";
933 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
935 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
936 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
938 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
940 }else { //print both results
942 if (leftdata.flag == "yes") {
943 out += querySeq->getName() + "_LEFT\t";
944 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
946 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
947 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
949 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
952 if (rightdata.flag == "yes") {
953 if (leftdata.flag == "yes") { out += "\n"; }
954 out += querySeq->getName() + "_RIGHT\t";
955 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
957 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
958 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
960 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
967 catch(exception& e) {
968 m->errorOut(e, "ChimeraSlayer", "getBlock");
972 //***************************************************************************************************************
973 string ChimeraSlayer::getBlock(data_struct data, string flag){
976 string outputString = "";
978 outputString += querySeq->getName() + "\t";
979 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
981 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
982 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
984 outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
988 catch(exception& e) {
989 m->errorOut(e, "ChimeraSlayer", "getBlock");
993 //***************************************************************************************************************/